9 Multivariate methods for heterogeneous data
## -----------------------------------------------------------------------------
library("pheatmap")
dir(system.file("extdata", package = "MSMB"))
## [1] "01126211_NB8_I025.fcs" "01247181_NB06_I025.fcs" "02276161_NB8_I025.fcs"
## [4] "03157141_NB06_I025.fcs" "05107251_NB06_I025.fcs" "06226101_NB8_I025.fcs"
## [7] "07115141_NB8_I025.fcs" "07215281_NB8_I025.fcs" "08045071_NB8_I025.fcs"
## [10] "09225121_NB8_I025.fcs" "10175181_NB8_I025.fcs" "10276181_NB8_I025.fcs"
## [13] "11145351_NB8_I025.fcs" "12015151_NB8_I025.fcs" "annotation.txt"
## [16] "PaintedTurtles.txt" "statecancerdeaths.csv"
dir(system.file("images", package = "MSMB"))
## [1] "hiv.png" "image-Cy3.tif" "image-DAPI.tif" "image-FITC.tif"
## [5] "mosquito.png"
dir(system.file("data", package = "MSMB"))
## [1] "brcalymphnode.RData" "datalist" "e100.RData"
## [4] "ukraine_coords.RData" "ukraine_dists.RData"
dir(system.file("scripts", package = "MSMB"))
## character(0)
data("ukraine_dists", package = "MSMB")
as.matrix(ukraine_dists)[1:4, 1:4]
## Kyiv Odesa Sevastopol Chernihiv
## Kyiv 0.0000 441.2548 687.7551 128.1287
## Odesa 441.2548 0.0000 301.7482 558.6483
## Sevastopol 687.7551 301.7482 0.0000 783.6561
## Chernihiv 128.1287 558.6483 783.6561 0.0000
## -----------------------------------------------------------------------------
pheatmap(as.matrix(ukraine_dists),
color = colorRampPalette(c("#0057b7", "#ffd700"))(50),
breaks = seq(0, max(ukraine_dists)^(1/2), length.out = 51)^2,
treeheight_row = 10, treeheight_col = 10)

## -----------------------------------------------------------------------------
pheatmap(as.matrix(ukraine_dists),
color = colorRampPalette(c("blue","white", "red"))(50),
breaks = seq(0, max(ukraine_dists)^(1/2), length.out = 51)^2,
treeheight_row = 10, treeheight_col = 10)

## -----------------------------------------------------------------------------
ukraine_mds = cmdscale(ukraine_dists, eig = TRUE)
ukraine_mds
## $points
## [,1] [,2]
## Kyiv 131.153351 169.874539
## Odesa 16.624277 -256.292319
## Sevastopol -245.755229 -405.370111
## Chernihiv 105.112948 295.355793
## Hostomel 152.902706 180.684913
## Kharkiv -275.252458 226.879572
## Kherson -120.217309 -204.620962
## Mariupol -466.241384 -50.420968
## Volnovakha -445.205338 1.530718
## Cherkasy -2.118748 86.947951
## Chernivtsi 410.954903 -131.132200
## Dnipro -241.445579 39.535552
## Donetsk -452.074679 52.713423
## Kramatorsk -411.493140 124.038010
## Ivano-Frankivsk 511.480496 -75.501010
## Izium -374.498185 164.904503
## Khmelnytskyi 357.335158 6.288698
## Kropyvnytskyi -42.284972 -10.063808
## Luhansk -536.915774 147.268066
## Lutsk 499.069258 131.831274
## Lviv 575.022730 18.679743
## Melitopol -317.100310 -126.114795
## Mykolaiv -64.292846 -180.495908
## Poltava -172.173947 149.369540
## Rivne 431.862640 128.771697
## Sumy -148.564160 295.679652
## Ternopil 458.969320 4.137619
## Uzhhorod 682.405840 -132.409076
## Vinnytsia 247.137629 6.186894
## Zaporizhzhia -266.264252 -24.722347
## Zhytomyr 255.669234 121.156771
## Simferopol -279.879385 -356.015502
## ostriv Zmiinyi 26.077203 -398.675921
##
## $eig
## [1] 3.913916e+06 1.084943e+06 3.419437e+02 4.842412e-01 2.129017e-01
## [6] 3.828931e-05 5.895169e-06 5.827564e-07 8.790838e-08 4.945342e-08
## [11] 7.060156e-10 3.426975e-10 2.363593e-10 5.733835e-11 4.074591e-11
## [16] 3.187820e-11 -1.794717e-11 -4.725759e-11 -6.066847e-11 -1.401687e-10
## [21] -1.462420e-10 -1.575528e-10 -3.028042e-10 -4.332777e-10 -4.804013e-10
## [26] -2.394962e-09 -1.512801e-08 -9.604281e-05 -2.505885e-04 -1.409065e-02
## [31] -1.190225e-01 -3.584671e+02 -8.851127e+02
##
## $x
## NULL
##
## $ac
## [1] 0
##
## $GOF
## [1] 0.9996828 0.9999315
seq(along = ukraine_mds$eig)
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33
## -----------------------------------------------------------------------------
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
plotscree = function(x, m = length(x$eig)) {
ggplot(tibble(eig = x$eig[seq_len(m)], k = seq(along = eig)),
aes(x = k, y = eig)) + theme_minimal() +
scale_x_discrete("k", limits = as.factor(seq_len(m))) +
geom_bar(stat = "identity", width = 0.5, fill = "#ffd700", col = "#0057b7")
}
## -----------------------------------------------------------------------------
plotscree(ukraine_mds, m = 4)

## -----------------------------------------------------------------------------
ukraine_mds$eig |> signif(3)
## [1] 3.91e+06 1.08e+06 3.42e+02 4.84e-01 2.13e-01 3.83e-05 5.90e-06
## [8] 5.83e-07 8.79e-08 4.95e-08 7.06e-10 3.43e-10 2.36e-10 5.73e-11
## [15] 4.07e-11 3.19e-11 -1.79e-11 -4.73e-11 -6.07e-11 -1.40e-10 -1.46e-10
## [22] -1.58e-10 -3.03e-10 -4.33e-10 -4.80e-10 -2.39e-09 -1.51e-08 -9.60e-05
## [29] -2.51e-04 -1.41e-02 -1.19e-01 -3.58e+02 -8.85e+02
plotscree(ukraine_mds)

## -----------------------------------------------------------------------------
stopifnot(any(ukraine_mds$eig < 0))
## -----------------------------------------------------------------------------
ukraine_mds_df = tibble(
PCo1 = ukraine_mds$points[, 1],
PCo2 = ukraine_mds$points[, 2],
labs = rownames(ukraine_mds$points)
)
if (with(ukraine_mds_df, labs[which.max(PCo1)] != "Luhansk"))
ukraine_mds_df$PCo1 = -ukraine_mds_df$PCo1
if (with(ukraine_mds_df, labs[which.max(PCo2)] != "Sevastopol"))
ukraine_mds_df$PCo2 = -ukraine_mds_df$PCo2
if(with(ukraine_mds_df,
labs[which.max(PCo1)] != "Luhansk" ||
labs[which.max(PCo2)] != "Sevastopol"))
stop("There is an error with 'ukraine_mds_df'.")
library("ggrepel")
g = ggplot(ukraine_mds_df, aes(x = PCo1, y = PCo2, label = labs)) +
geom_point() + geom_text_repel(col = "#0057b7") + coord_fixed()
g

## -----------------------------------------------------------------------------
g %+% mutate(ukraine_mds_df, PCo1 = PCo1, PCo2 = -PCo2)

## -----------------------------------------------------------------------------
data("ukraine_coords", package = "MSMB")
print.data.frame(ukraine_coords[1:4, c("city", "lat", "lon")])
## city lat lon
## 1 Kyiv 50.45003 30.52414
## 2 Odesa 46.48430 30.73229
## 3 Sevastopol 44.60544 33.52208
## 4 Chernihiv 51.49410 31.29433
ggplot(ukraine_coords, aes(x = lon, y = lat, label = city)) +
geom_point() + geom_text_repel(col = "#0057b7")

## -----------------------------------------------------------------------------
earthradius = 6371
## -----------------------------------------------------------------------------
## library("sf")
## library("rnaturalearth")
## library("rnaturalearthdata")
## world = ne_countries(scale = "medium", returnclass = "sf")
## ggplot() +
## geom_sf(data = world,
## fill = ifelse(world$geounit == "Ukraine", "#ffd700", "#f0f0f0")) +
## coord_sf(xlim = range(ukraine_coords$lon) + c(-0.5, + 2),
## ylim = range(ukraine_coords$lat) + c(-0.5, + 1)) +
## geom_point(aes(x = lon, y = lat), data = ukraine_coords) +
## geom_text_repel(aes(x = lon, y = lat, label = city),
## color = "#0057b7", data = ukraine_coords)
## -----------------------------------------------------------------------------
library("sf")
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library("rnaturalearth")
library("rnaturalearthdata")
##
## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
##
## countries110
world = ne_countries(scale = "medium", returnclass = "sf")
ggplot() +
geom_sf(data = world,
fill = ifelse(world$geounit == "Ukraine", "#ffd700", "#f0f0f0")) +
coord_sf(xlim = range(ukraine_coords$lon) + c(-0.5, + 2),
ylim = range(ukraine_coords$lat) + c(-0.5, + 1)) +
geom_point(aes(x = lon, y = lat), data = ukraine_coords) +
geom_text_repel(aes(x = lon, y = lat, label = city),
color = "#0057b7", data = ukraine_coords)

## -----------------------------------------------------------------------------
stopifnot(48 == round(mean(range(ukraine_coords$lat))))
## -----------------------------------------------------------------------------
X = with(ukraine_coords, cbind(lon, lat * cos(48)))
DdotD = as.matrix(dist(X)^2)
DdotD
## 1 2 3 4 5 6 7
## 1 0.000000 6.4880224 22.9856271 1.039897 0.078079 32.654314 10.3616001
## 2 6.488022 0.0000000 9.2295496 10.600698 7.125703 35.278880 3.5954616
## 3 22.985627 9.2295496 0.0000000 24.408633 25.315006 19.229610 2.5017166
## 4 1.039897 10.6006978 24.4086328 0.000000 1.407973 25.295041 11.4232240
## 5 0.078079 7.1257035 25.3150061 1.407973 0.000000 35.809417 11.9852323
## 6 32.654314 35.2788795 19.2296102 25.295041 35.809417 0.000000 17.5993187
## 7 10.361600 3.5954616 2.5017166 11.423224 11.985232 17.599319 0.0000000
## 8 53.972768 46.6338958 18.7651629 47.060335 58.155175 5.177725 24.3320810
## 9 51.992827 46.3085695 19.4934651 44.727463 56.091725 3.958303 24.1318623
## 10 2.769228 5.3511335 11.7381155 2.305343 3.774690 17.530386 3.5423098
## 11 22.954031 24.3194237 63.0761602 32.910235 20.845953 107.145729 45.8403927
## 12 22.018812 20.1842085 8.4232381 17.795757 24.716189 2.366426 7.2044150
## 13 55.385733 50.9327475 23.0782617 47.298755 59.597158 4.066677 27.5605946
## 14 51.046844 49.0342830 23.5037603 42.675351 55.061346 2.475354 26.3907422
## 15 34.756619 38.7002472 85.2844384 46.059139 31.925884 133.195415 64.7873242
## 16 46.269443 45.8547444 22.7280802 37.982124 50.070500 1.359925 24.3117095
## 17 13.000373 17.6151138 52.3043613 20.382276 11.316134 85.727149 35.0452806
## 18 4.574192 4.0336288 7.8279385 4.591068 5.795135 16.623990 1.5616478
## 19 78.414437 75.1452366 39.7997243 67.547454 83.355898 10.229200 46.0363062
## 20 27.117892 36.7313172 82.7197655 35.921616 24.403916 119.280738 60.2747731
## 21 42.304651 49.5191592 101.3061578 53.865935 39.009970 148.835175 78.0583059
## 22 28.926488 21.6804208 5.5204884 25.565543 31.988274 4.774306 7.6179846
## 23 7.106451 1.6908499 4.6376757 8.855028 8.357252 21.681212 0.4450834
## 24 16.517262 18.5328607 11.2386252 12.090665 18.827301 2.889551 7.2681010
## 25 18.268772 27.0867643 67.6860972 25.745375 16.062651 99.755626 47.1197473
## 26 18.394173 24.6023909 17.9382747 12.448015 20.687991 2.386461 12.2132951
## 27 24.654798 30.2896090 72.9301265 34.057498 22.219637 113.269151 52.9567180
## 28 68.968119 72.9386894 132.4966991 84.236831 64.894893 194.779354 108.1837383
## 29 4.835741 8.2209595 34.3155350 10.085170 3.961850 60.501664 20.0379322
## 30 23.874762 20.0021682 6.8638173 20.062002 26.682623 3.117502 6.8120113
## 31 3.455485 10.0975102 36.6518241 7.515411 2.571843 57.209844 21.0200913
## 32 25.191056 12.3202056 0.3861232 25.423348 27.788825 14.940535 3.3497843
## 33 11.163179 0.8992753 11.1870745 17.142752 11.659737 45.530470 6.6561563
## 8 9 10 11 12 13
## 1 53.9727683 51.9928268 2.7692280 22.9540306 22.0188120 55.3857335
## 2 46.6338958 46.3085695 5.3511335 24.3194237 20.1842085 50.9327475
## 3 18.7651629 19.4934651 11.7381155 63.0761602 8.4232381 23.0782617
## 4 47.0603353 44.7274629 2.3053433 32.9102352 17.7957569 47.2987549
## 5 58.1551751 56.0917253 3.7746903 20.8459526 24.7161886 59.5971583
## 6 5.1777254 3.9583027 17.5303861 107.1457286 2.3664261 4.0666770
## 7 24.3320810 24.1318623 3.5423098 45.8403927 7.2044150 27.5605946
## 8 0.0000000 0.1060518 32.4142324 135.4267024 7.0626837 0.4101164
## 9 0.1060518 0.0000000 31.0022246 133.8784130 6.3520337 0.1623058
## 10 32.4142324 31.0022246 0.0000000 38.0180085 9.2891976 33.8136937
## 11 135.4267024 133.8784130 38.0180085 0.0000000 82.8984697 140.7770862
## 12 7.0626837 6.3520337 9.2891976 82.8984697 0.0000000 7.6989994
## 13 0.4101164 0.1623058 33.8136937 140.7770862 7.6989994 0.0000000
## 14 1.1076157 0.5401810 30.7364262 135.7301685 6.4949432 0.2613170
## 15 166.2239096 164.2905812 54.1116632 1.6721332 106.8235561 171.7117011
## 16 1.8733377 1.0888513 27.2708746 128.9483727 5.2169786 0.8396915
## 17 113.9502215 112.0395063 25.8005757 1.6113875 65.3732534 117.9223526
## 18 28.7444513 27.7380380 0.4004237 40.0638505 7.7077111 30.7443997
## 19 3.9459223 3.6190559 52.7087524 178.5139119 18.1140620 2.3645432
## 20 155.0273544 152.4043049 46.1029508 2.8584371 96.6360353 158.8342094
## 21 185.8367374 183.4566102 64.5003925 4.6245529 121.9975855 190.9724268
## 22 4.7223181 4.7137610 13.8146448 90.0588669 1.1934114 6.4098252
## 23 30.8749774 30.4735925 2.5020857 37.3828173 10.2015158 34.1688289
## 24 11.5438346 10.3218652 6.2187455 74.8822351 0.7566720 11.5811090
## 25 132.7479104 130.2698752 34.2922323 2.3290794 79.1691345 136.1811840
## 26 13.5149439 11.7735098 8.4116112 81.4151014 2.5047313 12.4284445
## 27 145.4754513 143.3695970 41.8257719 0.7797699 89.7851853 150.0424988
## 28 233.4475309 231.3965135 95.4669178 13.2623426 162.3049849 240.3723337
## 29 84.3525712 82.6683411 12.9124360 6.7688995 43.4539812 87.7177864
## 30 6.1466459 5.6979458 10.4017779 84.3618171 0.1619747 7.2099473
## 31 82.9698040 80.8823184 11.7612388 9.0572990 41.9248674 85.4587660
## 32 13.7680957 14.4117366 12.4477618 71.2203632 5.9477644 17.5279469
## 33 55.3621479 55.4903188 10.6369416 21.9621310 27.6416578 60.8541043
## 14 15 16 17 18 19
## 1 51.0468436 34.7566185 46.2694425 13.000373 4.5741916 78.414437
## 2 49.0342830 38.7002472 45.8547444 17.615114 4.0336288 75.145237
## 3 23.5037603 85.2844384 22.7280802 52.304361 7.8279385 39.799724
## 4 42.6753513 46.0591391 37.9821242 20.382276 4.5910676 67.547454
## 5 55.0613465 31.9258842 50.0704997 11.316134 5.7951352 83.355898
## 6 2.4753537 133.1954152 1.3599252 85.727149 16.6239900 10.229200
## 7 26.3907422 64.7873242 24.3117095 35.045281 1.5616478 46.036306
## 8 1.1076157 166.2239096 1.8733377 113.950222 28.7444513 3.945922
## 9 0.5401810 164.2905812 1.0888513 112.039506 27.7380380 3.619056
## 10 30.7364262 54.1116632 27.2708746 25.800576 0.4004237 52.708752
## 11 135.7301685 1.6721332 128.9483727 1.611388 40.0638505 178.513912
## 12 6.4949432 106.8235561 5.2169786 65.373253 7.7077111 18.114062
## 13 0.2613170 171.7117011 0.8396915 117.922353 30.7443997 2.364543
## 14 0.0000000 165.7552932 0.1774971 112.655939 28.3105022 2.945604
## 15 165.7552932 0.0000000 157.9865986 5.249904 57.1522405 212.830899
## 16 0.1774971 157.9865986 0.0000000 106.091437 25.3179313 4.233319
## 17 112.6559395 5.2499042 106.0914373 0.000000 28.2830705 152.026178
## 18 28.3105022 57.1522405 25.3179313 28.283070 0.0000000 49.446153
## 19 2.9456036 212.8308993 4.2333189 152.026178 49.4461533 0.000000
## 20 152.0623384 1.7329850 143.9909793 3.473179 50.2867072 197.298787
## 21 184.1766497 0.8070818 175.6516938 8.762533 68.5257162 233.703500
## 22 6.3145013 115.6660533 5.8463546 73.328999 10.8507632 16.543333
## 23 32.5265313 54.6044000 29.9367918 27.593343 1.0389917 54.382512
## 24 9.4992741 97.0173969 7.5049307 57.338190 5.6992173 22.954155
## 25 129.8877406 3.5549081 122.4327600 1.120143 37.9946843 171.916694
## 26 9.6723868 103.4795178 7.3419552 62.118097 8.8002086 22.445250
## 27 144.0933542 0.9414863 136.6293157 1.932732 44.9864952 188.235620
## 28 233.5488913 5.8356794 224.4179084 22.135947 99.2738910 288.833062
## 29 83.2084900 14.1592319 77.6244859 2.230343 14.6354511 117.453280
## 30 6.4048687 108.7964824 5.4025969 67.250419 8.3160511 17.677257
## 31 80.4277143 16.4058690 74.5857569 3.144885 14.1879378 114.123887
## 32 17.9998359 94.6726029 17.4507685 58.917339 8.5629275 32.354939
## 33 59.4547080 35.6854054 56.4073614 17.501774 8.5969121 87.209365
## 20 21 22 23 24 25
## 1 27.1178918 42.3046509 28.9264882 7.1064514 16.5172625 18.2687722
## 2 36.7313172 49.5191592 21.6804208 1.6908499 18.5328607 27.0867643
## 3 82.7197655 101.3061578 5.5204884 4.6376757 11.2386252 67.6860972
## 4 35.9216163 53.8659352 25.5655426 8.8550276 12.0906650 25.7453751
## 5 24.4039163 39.0099702 31.9882737 8.3572517 18.8273009 16.0626514
## 6 119.2807381 148.8351746 4.7743059 21.6812117 2.8895513 99.7556263
## 7 60.2747731 78.0583059 7.6179846 0.4450834 7.2681010 47.1197473
## 8 155.0273544 185.8367374 4.7223181 30.8749774 11.5438346 132.7479104
## 9 152.4043049 183.4566102 4.7137610 30.4735925 10.3218652 130.2698752
## 10 46.1029508 64.5003925 13.8146448 2.5020857 6.2187455 34.2922323
## 11 2.8584371 4.6245529 90.0588669 37.3828173 74.8822351 2.3290794
## 12 96.6360353 121.9975855 1.1934114 10.2015158 0.7566720 79.1691345
## 13 158.8342094 190.9724268 6.4098252 34.1688289 11.5811090 136.1811840
## 14 152.0623384 184.1766497 6.3145013 32.5265313 9.4992741 129.8877406
## 15 1.7329850 0.8070818 115.6660533 54.6044000 97.0173969 3.5549081
## 16 143.9909793 175.6516938 5.8463546 29.9367918 7.5049307 122.4327600
## 17 3.4731794 8.7625333 73.3289991 27.5933431 57.3381897 1.1201430
## 18 50.2867072 68.5257162 10.8507632 1.0389917 5.6992173 37.9946843
## 19 197.2987871 233.7035003 16.5433329 54.3825123 22.9541549 171.9166940
## 20 0.0000000 1.9944279 107.4844698 50.3625840 85.7531091 0.8736548
## 21 1.9944279 0.0000000 132.5246212 66.7655744 110.6796917 5.1749993
## 22 107.4844698 132.5246212 0.0000000 11.4905380 3.7753888 89.2158432
## 23 50.3625840 66.7655744 11.4905380 0.0000000 9.3371728 38.4187253
## 24 85.7531091 110.6796917 3.7753888 9.3371728 0.0000000 69.3159754
## 25 0.8736548 5.1749993 89.2158432 38.4187253 69.3159754 0.0000000
## 26 89.9329066 116.4875079 7.1085601 14.2381887 0.7799218 73.1624224
## 27 0.6534950 2.4680781 98.8679685 43.7141373 80.2625198 0.8986299
## 28 10.9536753 3.6001028 172.3907456 95.0401638 150.4101585 17.2296970
## 29 10.8473929 19.8339421 50.1453244 14.5185172 37.0531360 5.7025898
## 30 99.4376196 124.5394882 0.4830476 10.0750633 1.5612237 81.7647462
## 31 11.3133377 21.5792844 49.8446909 15.4731461 34.7767520 5.8993729
## 32 90.8822475 111.2209374 3.1099167 6.1240960 9.0143452 74.8032424
## 33 36.1980833 46.7127210 27.8648792 4.4202551 26.6011858 27.4123278
## 26 27 28 29 30 31
## 1 18.3941726 24.6547978 68.968119 4.8357414 23.8747620 3.4554849
## 2 24.6023909 30.2896090 72.938689 8.2209595 20.0021682 10.0975102
## 3 17.9382747 72.9301265 132.496699 34.3155350 6.8638173 36.6518241
## 4 12.4480152 34.0574981 84.236831 10.0851702 20.0620022 7.5154114
## 5 20.6879914 22.2196370 64.894893 3.9618499 26.6826232 2.5718425
## 6 2.3864611 113.2691512 194.779354 60.5016636 3.1175020 57.2098435
## 7 12.2132951 52.9567180 108.183738 20.0379322 6.8120113 21.0200913
## 8 13.5149439 145.4754513 233.447531 84.3525712 6.1466459 82.9698040
## 9 11.7735098 143.3695970 231.396514 82.6683411 5.6979458 80.8823184
## 10 8.4116112 41.8257719 95.466918 12.9124360 10.4017779 11.7612388
## 11 81.4151014 0.7797699 13.262343 6.7688995 84.3618171 9.0572990
## 12 2.5047313 89.7851853 162.304985 43.4539812 0.1619747 41.9248674
## 13 12.4284445 150.0424988 240.372334 87.7177864 7.2099473 85.4587660
## 14 9.6723868 144.0933542 233.548891 83.2084900 6.4048687 80.4277143
## 15 103.4795178 0.9414863 5.835679 14.1592319 108.7964824 16.4058690
## 16 7.3419552 136.6293157 224.417908 77.6244859 5.4025969 74.5857569
## 17 62.1180974 1.9327317 22.135947 2.2303427 67.2504188 3.1448851
## 18 8.8002086 44.9864952 99.273891 14.6354511 8.3160511 14.1879378
## 19 22.4452504 188.2356198 288.833062 117.4532801 17.6772572 114.1238872
## 20 89.9329066 0.6534950 10.953675 10.8473929 99.4376196 11.3133377
## 21 116.4875079 2.4680781 3.600103 19.8339421 124.5394882 21.5792844
## 22 7.1085601 98.8679685 172.390746 50.1453244 0.4830476 49.8446909
## 23 14.2381887 43.7141373 95.040164 14.5185172 10.0750633 15.4731461
## 24 0.7799218 80.2625198 150.410158 37.0531360 1.5612237 34.7767520
## 25 73.1624224 0.8986299 17.229697 5.7025898 81.7647462 5.8993729
## 26 0.0000000 85.5941413 158.411095 41.2861801 3.9395990 37.7945660
## 27 85.5941413 0.0000000 11.178677 8.3148406 91.9435449 9.6732033
## 28 158.4110950 11.1786775 0.000000 38.1683822 164.4945840 41.6371450
## 29 41.2861801 8.3148406 38.168382 0.0000000 45.0084319 0.4734023
## 30 3.9395990 91.9435449 164.494584 45.0084319 0.0000000 43.9684572
## 31 37.7945660 9.6732033 41.637145 0.4734023 43.9684572 0.0000000
## 32 15.0457895 81.1150292 144.765436 39.2538584 4.4749098 41.0644749
## 33 34.2696580 28.8455288 67.073765 9.4934344 26.9186055 12.6185935
## 32 33
## 1 25.1910564 11.1631791
## 2 12.3202056 0.8992753
## 3 0.3861232 11.1870745
## 4 25.4233484 17.1427515
## 5 27.7888252 11.6597366
## 6 14.9405351 45.5304696
## 7 3.3497843 6.6561563
## 8 13.7680957 55.3621479
## 9 14.4117366 55.4903188
## 10 12.4477618 10.6369416
## 11 71.2203632 21.9621310
## 12 5.9477644 27.6416578
## 13 17.5279469 60.8541043
## 14 17.9998359 59.4547080
## 15 94.6726029 35.6854054
## 16 17.4507685 56.4073614
## 17 58.9173386 17.5017736
## 18 8.5629275 8.5969121
## 19 32.3549392 87.2093651
## 20 90.8822475 36.1980833
## 21 111.2209374 46.7127210
## 22 3.1099167 27.8648792
## 23 6.1240960 4.4202551
## 24 9.0143452 26.6011858
## 25 74.8032424 27.4123278
## 26 15.0457895 34.2696580
## 27 81.1150292 28.8455288
## 28 144.7654360 67.0737651
## 29 39.2538584 9.4934344
## 30 4.4749098 26.9186055
## 31 41.0644749 12.6185935
## 32 0.0000000 15.2411299
## 33 15.2411299 0.0000000
apply(X,2,mean)
## lon
## 31.69643 -31.11060
X
## lon
## [1,] 30.52414 -32.29530
## [2,] 30.73229 -29.75666
## [3,] 33.52208 -28.55392
## [4,] 31.29433 -32.96366
## [5,] 30.25909 -32.38379
## [6,] 36.23101 -32.00230
## [7,] 32.62579 -29.85714
## [8,] 37.54996 -30.14809
## [9,] 37.49985 -30.46986
## [10,] 32.05878 -31.65180
## [11,] 25.93765 -30.91031
## [12,] 35.04177 -31.02653
## [13,] 37.80134 -30.73709
## [14,] 37.58438 -31.19996
## [15,] 24.71032 -31.31748
## [16,] 37.27841 -31.48958
## [17,] 26.97938 -31.63570
## [18,] 32.26563 -31.05377
## [19,] 39.29732 -31.09290
## [20,] 25.32008 -32.48417
## [21,] 24.03159 -31.90604
## [22,] 35.38273 -29.98867
## [23,] 31.99397 -30.07133
## [24,] 34.55079 -31.74459
## [25,] 26.25132 -32.40386
## [26,] 34.80277 -32.59101
## [27,] 25.59189 -31.72285
## [28,] 22.30226 -31.12534
## [29,] 28.46797 -31.51560
## [30,] 35.11829 -30.63141
## [31,] 28.66923 -32.17355
## [32,] 34.10249 -28.77586
## [33,] 30.20331 -28.96961
sweep(X,2,apply(X,2,mean)) # Return an array obtained from an input array by sweeping out a summary statistic.
## lon
## [1,] -1.1722946 -1.18470529
## [2,] -0.9641429 1.35393515
## [3,] 1.8256535 2.55667604
## [4,] -0.4020987 -1.85305785
## [5,] -1.4373407 -1.27319014
## [6,] 4.5345839 -0.89170131
## [7,] 0.9293633 1.25345675
## [8,] 5.8535314 0.96251089
## [9,] 5.8034204 0.64073357
## [10,] 0.3623498 -0.54120352
## [11,] -5.7587775 0.20028757
## [12,] 3.3453404 0.08406815
## [13,] 6.1049100 0.37350736
## [14,] 5.8879505 -0.08935937
## [15,] -6.9861119 -0.20687764
## [16,] 5.5819818 -0.37898026
## [17,] -4.7170514 -0.52510492
## [18,] 0.5691976 0.05682463
## [19,] 7.6008846 0.01769395
## [20,] -6.3763527 -1.37357329
## [21,] -7.6648386 -0.79544530
## [22,] 3.6862974 1.12193122
## [23,] 0.2975359 1.03926631
## [24,] 2.8543641 -0.63399469
## [25,] -5.4451142 -1.29326347
## [26,] 3.1063416 -1.48041607
## [27,] -6.1045447 -0.61224854
## [28,] -9.3941738 -0.01473883
## [29,] -3.2284557 -0.40499835
## [30,] 3.4218560 0.47918841
## [31,] -3.0271962 -1.06294741
## [32,] 2.4060551 2.33473640
## [33,] -1.4931233 2.14098983
n=4
diag(rep(1,n))-(1/n)
## [,1] [,2] [,3] [,4]
## [1,] 0.75 -0.25 -0.25 -0.25
## [2,] -0.25 0.75 -0.25 -0.25
## [3,] -0.25 -0.25 0.75 -0.25
## [4,] -0.25 -0.25 -0.25 0.75
matrix(1, nrow = n, ncol = n)
## [,1] [,2] [,3] [,4]
## [1,] 1 1 1 1
## [2,] 1 1 1 1
## [3,] 1 1 1 1
## [4,] 1 1 1 1
diag(rep(1,n))-(1/n) * matrix(1, nrow = n, ncol = n)
## [,1] [,2] [,3] [,4]
## [1,] 0.75 -0.25 -0.25 -0.25
## [2,] -0.25 0.75 -0.25 -0.25
## [3,] -0.25 -0.25 0.75 -0.25
## [4,] -0.25 -0.25 -0.25 0.75
## -----------------------------------------------------------------------------
n = nrow(X)
H = diag(rep(1,n))-(1/n) * matrix(1, nrow = n, ncol = n)
Xc = sweep(X,2,apply(X,2,mean))
Xc[1:2, ]
## lon
## [1,] -1.1722946 -1.184705
## [2,] -0.9641429 1.353935
HX = H %*% X
HX[1:2, ]
## lon
## [1,] -1.1722946 -1.184705
## [2,] -0.9641429 1.353935
apply(HX, 2, mean)
## lon
## -1.773094e-15 2.227188e-15
## -----------------------------------------------------------------------------
B0 = H %*% DdotD %*% H
B2 = HX %*% t(HX)
B2[1:3, 1:3] / B0[1:3, 1:3]
## [,1] [,2] [,3]
## [1,] -0.5 -0.5 -0.5
## [2,] -0.5 -0.5 -0.5
## [3,] -0.5 -0.5 -0.5
max(abs(-0.5 * B0 - B2))
## [1] 1.207923e-13
## -----------------------------------------------------------------------------
ekm = read.table("../data/ekman.txt", header=TRUE)
rownames(ekm) = colnames(ekm)
disekm = 1 - ekm - diag(1, ncol(ekm))
disekm[1:5, 1:5]
## w434 w445 w465 w472 w490
## w434 0.00 0.14 0.58 0.58 0.82
## w445 0.14 0.00 0.50 0.56 0.78
## w465 0.58 0.50 0.00 0.19 0.53
## w472 0.58 0.56 0.19 0.00 0.46
## w490 0.82 0.78 0.53 0.46 0.00
disekm = as.dist(disekm)
## -----------------------------------------------------------------------------
mdsekm = cmdscale(disekm, eig = TRUE)
plotscree(mdsekm)

## -----------------------------------------------------------------------------
dfekm = mdsekm$points[, 1:2] |>
`colnames<-`(paste0("MDS", 1:2)) |>
as_tibble() |>
mutate(
name = rownames(ekm),
rgb = photobiology::w_length2rgb(as.numeric(sub("w", "", name))))
ggplot(dfekm, aes(x = MDS1, y = MDS2)) +
geom_point(col = dfekm$rgb, size = 4) +
geom_text_repel(aes(label = name)) + coord_fixed()

dim(stress)
## [1] 100 4
stress
## [,1] [,2] [,3] [,4]
## [1,] 0.2564344 0.02310251 0.01244422 0.003105991
## [2,] 0.2564337 0.02310251 0.01244420 0.002708738
## [3,] 0.2567348 0.02310251 0.01244422 0.002651645
## [4,] 0.2568435 0.02310251 0.01244414 0.002867260
## [5,] 0.2564709 0.02310251 0.01244451 0.003273426
## [6,] 0.2684059 0.02310251 0.01244596 0.003217022
## [7,] 0.2564709 0.02310251 0.01244421 0.002744531
## [8,] 0.2660000 0.02310251 0.01244449 0.003343448
## [9,] 0.2568456 0.02310251 0.01253515 0.003606675
## [10,] 0.2563815 0.02310251 0.01244423 0.003507884
## [11,] 0.2721258 0.02310251 0.01244467 0.002898068
## [12,] 0.2705604 0.02310251 0.01244417 0.003083802
## [13,] 0.2564337 0.02310251 0.01244432 0.003718418
## [14,] 0.2564337 0.02310251 0.01244449 0.002763495
## [15,] 0.2567348 0.02310251 0.01244427 0.003088338
## [16,] 0.2567348 0.02310251 0.01244414 0.003487105
## [17,] 0.2665161 0.02310251 0.01244427 0.002644791
## [18,] 0.2665161 0.02310251 0.01244641 0.002798770
## [19,] 0.2563948 0.02310251 0.01244551 0.002673181
## [20,] 0.2671614 0.02310251 0.01244429 0.003172974
## [21,] 0.2721258 0.02310251 0.01250509 0.002742815
## [22,] 0.2564344 0.02310251 0.01244417 0.002913404
## [23,] 0.2564337 0.02310251 0.01244415 0.002859349
## [24,] 0.2664448 0.02310251 0.01244423 0.002947876
## [25,] 0.2563948 0.02310251 0.01244431 0.002696052
## [26,] 0.2563815 0.02310251 0.01249460 0.002679122
## [27,] 0.2564709 0.02310251 0.01244416 0.003006277
## [28,] 0.2564344 0.02310251 0.01244421 0.002975636
## [29,] 0.2563948 0.02310251 0.01244426 0.002665067
## [30,] 0.2688540 0.02310251 0.01244428 0.002839643
## [31,] 0.2567348 0.02310251 0.01244419 0.002837803
## [32,] 0.2563815 0.02310251 0.01244431 0.002666064
## [33,] 0.2564344 0.02310251 0.01244417 0.002641424
## [34,] 0.2646034 0.02310251 0.01244430 0.002726495
## [35,] 0.2567622 0.02310251 0.01244429 0.003056776
## [36,] 0.2567348 0.02310251 0.01253515 0.002621917
## [37,] 0.2564709 0.02310251 0.01244419 0.002771835
## [38,] 0.2599293 0.02310251 0.01244458 0.003328291
## [39,] 0.2566013 0.02310251 0.01244436 0.002740097
## [40,] 0.2564337 0.02310251 0.01244450 0.002620992
## [41,] 0.2563948 0.02310251 0.01244433 0.003000842
## [42,] 0.2645525 0.02310251 0.01244497 0.002819788
## [43,] 0.2567348 0.02310251 0.01244552 0.003533963
## [44,] 0.2567495 0.02310251 0.01244425 0.002795333
## [45,] 0.2599882 0.02310251 0.01244422 0.002757198
## [46,] 0.2566766 0.02310251 0.01244424 0.002839389
## [47,] 0.2567348 0.02310251 0.01244422 0.002838302
## [48,] 0.2568435 0.02310251 0.01253479 0.003294939
## [49,] 0.2564337 0.02310251 0.01244430 0.003597160
## [50,] 0.2563948 0.02310251 0.01244435 0.002840022
## [51,] 0.2564337 0.02310251 0.01244424 0.003496745
## [52,] 0.2563815 0.02310251 0.01244423 0.002670635
## [53,] 0.2690747 0.02310251 0.01244422 0.002911839
## [54,] 0.2671614 0.02310251 0.01244429 0.003393294
## [55,] 0.2564337 0.02310251 0.01244422 0.002652747
## [56,] 0.2564337 0.02310251 0.01244420 0.002800906
## [57,] 0.2564344 0.02310251 0.01244424 0.002836812
## [58,] 0.2564337 0.02310251 0.01244421 0.003562442
## [59,] 0.2645525 0.02310251 0.01244421 0.003407879
## [60,] 0.2566013 0.02310251 0.01244418 0.002753160
## [61,] 0.2563815 0.02310251 0.01244494 0.002595412
## [62,] 0.2563815 0.02310251 0.01244429 0.003498198
## [63,] 0.2671614 0.02310251 0.01244431 0.003018693
## [64,] 0.2563948 0.02310251 0.01244420 0.003247601
## [65,] 0.2563948 0.02310251 0.01244421 0.003246640
## [66,] 0.2688540 0.02310251 0.01244435 0.003143784
## [67,] 0.2567622 0.02310251 0.01244415 0.002851555
## [68,] 0.2567622 0.02310251 0.01244417 0.003406754
## [69,] 0.2567622 0.02310251 0.01244422 0.002756205
## [70,] 0.2567622 0.02310251 0.01244431 0.002790451
## [71,] 0.2567622 0.02310251 0.01244429 0.003356577
## [72,] 0.2680470 0.02310251 0.01244421 0.002696757
## [73,] 0.2601376 0.02310251 0.01244419 0.002748391
## [74,] 0.2563948 0.02310251 0.01244421 0.002657489
## [75,] 0.2646034 0.02310251 0.01244428 0.003284028
## [76,] 0.2563815 0.02310251 0.01244423 0.003311805
## [77,] 0.2563948 0.02310251 0.01244421 0.002780885
## [78,] 0.2564709 0.02310251 0.01244423 0.003265715
## [79,] 0.2668051 0.02310251 0.01244433 0.003100225
## [80,] 0.2563815 0.02310251 0.01245065 0.002676644
## [81,] 0.2648963 0.02310251 0.01253469 0.002734583
## [82,] 0.2665161 0.02310251 0.01244411 0.002718548
## [83,] 0.2568435 0.02310251 0.01244419 0.002737317
## [84,] 0.2565841 0.02310251 0.01244414 0.003107693
## [85,] 0.2599423 0.02310251 0.01244413 0.002776348
## [86,] 0.2563948 0.02310251 0.01245989 0.002694656
## [87,] 0.2564709 0.02310251 0.01244487 0.003266339
## [88,] 0.2564344 0.02310251 0.01245031 0.002951356
## [89,] 0.2567348 0.02310251 0.01244414 0.003240187
## [90,] 0.2563815 0.02310251 0.01244450 0.003606639
## [91,] 0.2567622 0.02310251 0.01244414 0.002876276
## [92,] 0.2618162 0.02310251 0.01244474 0.002713966
## [93,] 0.2563948 0.02310251 0.01244438 0.002753725
## [94,] 0.2675244 0.02310251 0.01244426 0.003497540
## [95,] 0.2564709 0.02310251 0.01244422 0.002667266
## [96,] 0.2564709 0.02310251 0.01244442 0.002967706
## [97,] 0.2567495 0.02310251 0.01244416 0.003459355
## [98,] 0.2669988 0.02310251 0.01244422 0.002875183
## [99,] 0.2567348 0.02310251 0.01244435 0.002643288
## [100,] 0.2563815 0.02310251 0.01244425 0.003239758
## -----------------------------------------------------------------------------
dfstr = reshape2::melt(stress, varnames = c("replicate","dimensions"))
dfstr
## replicate dimensions value
## 1 1 1 0.256434433
## 2 2 1 0.256433681
## 3 3 1 0.256734766
## 4 4 1 0.256843486
## 5 5 1 0.256470858
## 6 6 1 0.268405905
## 7 7 1 0.256470858
## 8 8 1 0.265999985
## 9 9 1 0.256845647
## 10 10 1 0.256381465
## 11 11 1 0.272125774
## 12 12 1 0.270560362
## 13 13 1 0.256433681
## 14 14 1 0.256433681
## 15 15 1 0.256734766
## 16 16 1 0.256734766
## 17 17 1 0.266516103
## 18 18 1 0.266516103
## 19 19 1 0.256394793
## 20 20 1 0.267161436
## 21 21 1 0.272125774
## 22 22 1 0.256434433
## 23 23 1 0.256433681
## 24 24 1 0.266444849
## 25 25 1 0.256394793
## 26 26 1 0.256381465
## 27 27 1 0.256470858
## 28 28 1 0.256434433
## 29 29 1 0.256394793
## 30 30 1 0.268853953
## 31 31 1 0.256734766
## 32 32 1 0.256381465
## 33 33 1 0.256434433
## 34 34 1 0.264603406
## 35 35 1 0.256762244
## 36 36 1 0.256734766
## 37 37 1 0.256470858
## 38 38 1 0.259929280
## 39 39 1 0.256601284
## 40 40 1 0.256433681
## 41 41 1 0.256394793
## 42 42 1 0.264552504
## 43 43 1 0.256734766
## 44 44 1 0.256749537
## 45 45 1 0.259988227
## 46 46 1 0.256676573
## 47 47 1 0.256734766
## 48 48 1 0.256843486
## 49 49 1 0.256433681
## 50 50 1 0.256394793
## 51 51 1 0.256433681
## 52 52 1 0.256381465
## 53 53 1 0.269074711
## 54 54 1 0.267161436
## 55 55 1 0.256433681
## 56 56 1 0.256433681
## 57 57 1 0.256434433
## 58 58 1 0.256433681
## 59 59 1 0.264552504
## 60 60 1 0.256601284
## 61 61 1 0.256381465
## 62 62 1 0.256381465
## 63 63 1 0.267161436
## 64 64 1 0.256394793
## 65 65 1 0.256394793
## 66 66 1 0.268853953
## 67 67 1 0.256762244
## 68 68 1 0.256762244
## 69 69 1 0.256762244
## 70 70 1 0.256762244
## 71 71 1 0.256762244
## 72 72 1 0.268046999
## 73 73 1 0.260137582
## 74 74 1 0.256394793
## 75 75 1 0.264603406
## 76 76 1 0.256381465
## 77 77 1 0.256394793
## 78 78 1 0.256470858
## 79 79 1 0.266805072
## 80 80 1 0.256381465
## 81 81 1 0.264896306
## 82 82 1 0.266516103
## 83 83 1 0.256843486
## 84 84 1 0.256584099
## 85 85 1 0.259942331
## 86 86 1 0.256394793
## 87 87 1 0.256470858
## 88 88 1 0.256434433
## 89 89 1 0.256734766
## 90 90 1 0.256381470
## 91 91 1 0.256762244
## 92 92 1 0.261816201
## 93 93 1 0.256394793
## 94 94 1 0.267524440
## 95 95 1 0.256470858
## 96 96 1 0.256470858
## 97 97 1 0.256749537
## 98 98 1 0.266998761
## 99 99 1 0.256734766
## 100 100 1 0.256381465
## 101 1 2 0.023102506
## 102 2 2 0.023102506
## 103 3 2 0.023102506
## 104 4 2 0.023102506
## 105 5 2 0.023102506
## 106 6 2 0.023102506
## 107 7 2 0.023102506
## 108 8 2 0.023102506
## 109 9 2 0.023102506
## 110 10 2 0.023102506
## 111 11 2 0.023102506
## 112 12 2 0.023102506
## 113 13 2 0.023102506
## 114 14 2 0.023102506
## 115 15 2 0.023102506
## 116 16 2 0.023102506
## 117 17 2 0.023102506
## 118 18 2 0.023102506
## 119 19 2 0.023102506
## 120 20 2 0.023102506
## 121 21 2 0.023102506
## 122 22 2 0.023102506
## 123 23 2 0.023102506
## 124 24 2 0.023102506
## 125 25 2 0.023102506
## 126 26 2 0.023102506
## 127 27 2 0.023102506
## 128 28 2 0.023102506
## 129 29 2 0.023102506
## 130 30 2 0.023102506
## 131 31 2 0.023102506
## 132 32 2 0.023102506
## 133 33 2 0.023102506
## 134 34 2 0.023102506
## 135 35 2 0.023102506
## 136 36 2 0.023102506
## 137 37 2 0.023102506
## 138 38 2 0.023102506
## 139 39 2 0.023102506
## 140 40 2 0.023102506
## 141 41 2 0.023102506
## 142 42 2 0.023102506
## 143 43 2 0.023102506
## 144 44 2 0.023102506
## 145 45 2 0.023102506
## 146 46 2 0.023102506
## 147 47 2 0.023102506
## 148 48 2 0.023102506
## 149 49 2 0.023102506
## 150 50 2 0.023102506
## 151 51 2 0.023102506
## 152 52 2 0.023102506
## 153 53 2 0.023102506
## 154 54 2 0.023102506
## 155 55 2 0.023102506
## 156 56 2 0.023102506
## 157 57 2 0.023102506
## 158 58 2 0.023102506
## 159 59 2 0.023102506
## 160 60 2 0.023102506
## 161 61 2 0.023102506
## 162 62 2 0.023102506
## 163 63 2 0.023102506
## 164 64 2 0.023102506
## 165 65 2 0.023102506
## 166 66 2 0.023102506
## 167 67 2 0.023102506
## 168 68 2 0.023102506
## 169 69 2 0.023102506
## 170 70 2 0.023102506
## 171 71 2 0.023102506
## 172 72 2 0.023102506
## 173 73 2 0.023102506
## 174 74 2 0.023102506
## 175 75 2 0.023102506
## 176 76 2 0.023102506
## 177 77 2 0.023102506
## 178 78 2 0.023102506
## 179 79 2 0.023102506
## 180 80 2 0.023102506
## 181 81 2 0.023102506
## 182 82 2 0.023102506
## 183 83 2 0.023102506
## 184 84 2 0.023102506
## 185 85 2 0.023102506
## 186 86 2 0.023102506
## 187 87 2 0.023102506
## 188 88 2 0.023102506
## 189 89 2 0.023102506
## 190 90 2 0.023102506
## 191 91 2 0.023102506
## 192 92 2 0.023102506
## 193 93 2 0.023102506
## 194 94 2 0.023102506
## 195 95 2 0.023102506
## 196 96 2 0.023102506
## 197 97 2 0.023102506
## 198 98 2 0.023102506
## 199 99 2 0.023102506
## 200 100 2 0.023102506
## 201 1 3 0.012444222
## 202 2 3 0.012444203
## 203 3 3 0.012444224
## 204 4 3 0.012444136
## 205 5 3 0.012444513
## 206 6 3 0.012445962
## 207 7 3 0.012444207
## 208 8 3 0.012444495
## 209 9 3 0.012535153
## 210 10 3 0.012444226
## 211 11 3 0.012444667
## 212 12 3 0.012444170
## 213 13 3 0.012444316
## 214 14 3 0.012444494
## 215 15 3 0.012444267
## 216 16 3 0.012444144
## 217 17 3 0.012444273
## 218 18 3 0.012446411
## 219 19 3 0.012445505
## 220 20 3 0.012444290
## 221 21 3 0.012505088
## 222 22 3 0.012444174
## 223 23 3 0.012444147
## 224 24 3 0.012444235
## 225 25 3 0.012444314
## 226 26 3 0.012494603
## 227 27 3 0.012444164
## 228 28 3 0.012444210
## 229 29 3 0.012444265
## 230 30 3 0.012444282
## 231 31 3 0.012444193
## 232 32 3 0.012444305
## 233 33 3 0.012444172
## 234 34 3 0.012444302
## 235 35 3 0.012444290
## 236 36 3 0.012535153
## 237 37 3 0.012444194
## 238 38 3 0.012444575
## 239 39 3 0.012444362
## 240 40 3 0.012444505
## 241 41 3 0.012444328
## 242 42 3 0.012444969
## 243 43 3 0.012445516
## 244 44 3 0.012444249
## 245 45 3 0.012444224
## 246 46 3 0.012444241
## 247 47 3 0.012444216
## 248 48 3 0.012534792
## 249 49 3 0.012444303
## 250 50 3 0.012444353
## 251 51 3 0.012444243
## 252 52 3 0.012444227
## 253 53 3 0.012444225
## 254 54 3 0.012444290
## 255 55 3 0.012444218
## 256 56 3 0.012444199
## 257 57 3 0.012444236
## 258 58 3 0.012444208
## 259 59 3 0.012444211
## 260 60 3 0.012444176
## 261 61 3 0.012444943
## 262 62 3 0.012444294
## 263 63 3 0.012444314
## 264 64 3 0.012444197
## 265 65 3 0.012444210
## 266 66 3 0.012444347
## 267 67 3 0.012444153
## 268 68 3 0.012444173
## 269 69 3 0.012444221
## 270 70 3 0.012444309
## 271 71 3 0.012444287
## 272 72 3 0.012444206
## 273 73 3 0.012444190
## 274 74 3 0.012444214
## 275 75 3 0.012444281
## 276 76 3 0.012444229
## 277 77 3 0.012444214
## 278 78 3 0.012444229
## 279 79 3 0.012444328
## 280 80 3 0.012450653
## 281 81 3 0.012534687
## 282 82 3 0.012444111
## 283 83 3 0.012444186
## 284 84 3 0.012444135
## 285 85 3 0.012444135
## 286 86 3 0.012459894
## 287 87 3 0.012444871
## 288 88 3 0.012450311
## 289 89 3 0.012444138
## 290 90 3 0.012444505
## 291 91 3 0.012444143
## 292 92 3 0.012444738
## 293 93 3 0.012444378
## 294 94 3 0.012444263
## 295 95 3 0.012444222
## 296 96 3 0.012444422
## 297 97 3 0.012444161
## 298 98 3 0.012444218
## 299 99 3 0.012444349
## 300 100 3 0.012444254
## 301 1 4 0.003105991
## 302 2 4 0.002708738
## 303 3 4 0.002651645
## 304 4 4 0.002867260
## 305 5 4 0.003273426
## 306 6 4 0.003217022
## 307 7 4 0.002744531
## 308 8 4 0.003343448
## 309 9 4 0.003606675
## 310 10 4 0.003507884
## 311 11 4 0.002898068
## 312 12 4 0.003083802
## 313 13 4 0.003718418
## 314 14 4 0.002763495
## 315 15 4 0.003088338
## 316 16 4 0.003487105
## 317 17 4 0.002644791
## 318 18 4 0.002798770
## 319 19 4 0.002673181
## 320 20 4 0.003172974
## 321 21 4 0.002742815
## 322 22 4 0.002913404
## 323 23 4 0.002859349
## 324 24 4 0.002947876
## 325 25 4 0.002696052
## 326 26 4 0.002679122
## 327 27 4 0.003006277
## 328 28 4 0.002975636
## 329 29 4 0.002665067
## 330 30 4 0.002839643
## 331 31 4 0.002837803
## 332 32 4 0.002666064
## 333 33 4 0.002641424
## 334 34 4 0.002726495
## 335 35 4 0.003056776
## 336 36 4 0.002621917
## 337 37 4 0.002771835
## 338 38 4 0.003328291
## 339 39 4 0.002740097
## 340 40 4 0.002620992
## 341 41 4 0.003000842
## 342 42 4 0.002819788
## 343 43 4 0.003533963
## 344 44 4 0.002795333
## 345 45 4 0.002757198
## 346 46 4 0.002839389
## 347 47 4 0.002838302
## 348 48 4 0.003294939
## 349 49 4 0.003597160
## 350 50 4 0.002840022
## 351 51 4 0.003496745
## 352 52 4 0.002670635
## 353 53 4 0.002911839
## 354 54 4 0.003393294
## 355 55 4 0.002652747
## 356 56 4 0.002800906
## 357 57 4 0.002836812
## 358 58 4 0.003562442
## 359 59 4 0.003407879
## 360 60 4 0.002753160
## 361 61 4 0.002595412
## 362 62 4 0.003498198
## 363 63 4 0.003018693
## 364 64 4 0.003247601
## 365 65 4 0.003246640
## 366 66 4 0.003143784
## 367 67 4 0.002851555
## 368 68 4 0.003406754
## 369 69 4 0.002756205
## 370 70 4 0.002790451
## 371 71 4 0.003356577
## 372 72 4 0.002696757
## 373 73 4 0.002748391
## 374 74 4 0.002657489
## 375 75 4 0.003284028
## 376 76 4 0.003311805
## 377 77 4 0.002780885
## 378 78 4 0.003265715
## 379 79 4 0.003100225
## 380 80 4 0.002676644
## 381 81 4 0.002734583
## 382 82 4 0.002718548
## 383 83 4 0.002737317
## 384 84 4 0.003107693
## 385 85 4 0.002776348
## 386 86 4 0.002694656
## 387 87 4 0.003266339
## 388 88 4 0.002951356
## 389 89 4 0.003240187
## 390 90 4 0.003606639
## 391 91 4 0.002876276
## 392 92 4 0.002713966
## 393 93 4 0.002753725
## 394 94 4 0.003497540
## 395 95 4 0.002667266
## 396 96 4 0.002967706
## 397 97 4 0.003459355
## 398 98 4 0.002875183
## 399 99 4 0.002643288
## 400 100 4 0.003239758
library("ggplot2")
ggplot(dfstr, aes(y = value, x = dimensions, group = dimensions)) +
geom_boxplot()

## -----------------------------------------------------------------------------
nmdsk2 = metaMDS(disekm, k = 2, autotransform = FALSE)
## Run 0 stress 0.02310251
## Run 1 stress 0.02310251
## ... New best solution
## ... Procrustes: rmse 2.261024e-06 max resid 4.035376e-06
## ... Similar to previous best
## Run 2 stress 0.02310251
## ... Procrustes: rmse 2.288581e-06 max resid 4.010604e-06
## ... Similar to previous best
## Run 3 stress 0.02310251
## ... New best solution
## ... Procrustes: rmse 8.855765e-07 max resid 2.357504e-06
## ... Similar to previous best
## Run 4 stress 0.02310251
## ... Procrustes: rmse 1.676326e-06 max resid 2.74728e-06
## ... Similar to previous best
## Run 5 stress 0.02310251
## ... Procrustes: rmse 1.433153e-06 max resid 2.447657e-06
## ... Similar to previous best
## Run 6 stress 0.02310251
## ... Procrustes: rmse 3.523568e-06 max resid 6.102494e-06
## ... Similar to previous best
## Run 7 stress 0.02310251
## ... Procrustes: rmse 3.330735e-06 max resid 5.309519e-06
## ... Similar to previous best
## Run 8 stress 0.02310251
## ... Procrustes: rmse 2.275031e-06 max resid 3.567351e-06
## ... Similar to previous best
## Run 9 stress 0.02310251
## ... New best solution
## ... Procrustes: rmse 1.042435e-06 max resid 1.695406e-06
## ... Similar to previous best
## Run 10 stress 0.02310251
## ... Procrustes: rmse 1.989542e-06 max resid 3.352387e-06
## ... Similar to previous best
## Run 11 stress 0.02310251
## ... Procrustes: rmse 1.114155e-06 max resid 1.686595e-06
## ... Similar to previous best
## Run 12 stress 0.02310251
## ... Procrustes: rmse 1.028951e-06 max resid 1.76672e-06
## ... Similar to previous best
## Run 13 stress 0.02310251
## ... Procrustes: rmse 2.026597e-06 max resid 3.485932e-06
## ... Similar to previous best
## Run 14 stress 0.02310251
## ... Procrustes: rmse 2.134079e-06 max resid 3.604351e-06
## ... Similar to previous best
## Run 15 stress 0.02310251
## ... Procrustes: rmse 2.240369e-06 max resid 3.664711e-06
## ... Similar to previous best
## Run 16 stress 0.02310251
## ... Procrustes: rmse 1.980293e-06 max resid 3.736667e-06
## ... Similar to previous best
## Run 17 stress 0.02310251
## ... Procrustes: rmse 1.484197e-06 max resid 2.537041e-06
## ... Similar to previous best
## Run 18 stress 0.02310251
## ... Procrustes: rmse 1.714337e-06 max resid 2.964587e-06
## ... Similar to previous best
## Run 19 stress 0.02310251
## ... Procrustes: rmse 5.208182e-06 max resid 8.464009e-06
## ... Similar to previous best
## Run 20 stress 0.02310251
## ... Procrustes: rmse 2.465314e-06 max resid 4.204134e-06
## ... Similar to previous best
## *** Solution reached
nmdsk2
##
## Call:
## metaMDS(comm = disekm, k = 2, autotransform = FALSE)
##
## global Multidimensional Scaling using monoMDS
##
## Data: disekm
## Distance: user supplied
##
## Dimensions: 2
## Stress: 0.02310251
## Stress type 1, weak ties
## Two convergent solutions found after 20 tries
## Scaling: centring, PC rotation
## Species: scores missing
nmdsk2$points
## MDS1 MDS2
## w434 -0.1531437 -0.43529207
## w445 -0.1778139 -0.39469836
## w465 -0.4343192 -0.27291116
## w472 -0.4656931 -0.24265893
## w490 -0.4767777 0.06396620
## w504 -0.3992916 0.31370056
## w537 -0.2638100 0.41606649
## w555 -0.1493467 0.47154465
## w584 0.2711683 0.35511499
## w600 0.4003808 0.18752334
## w610 0.4948220 0.03556897
## w628 0.4855930 -0.10962294
## w651 0.4411678 -0.16687544
## w674 0.4270639 -0.22142630
## attr(,"centre")
## [1] TRUE
## attr(,"pc")
## [1] TRUE
## attr(,"halfchange")
## [1] FALSE
## attr(,"internalscaling")
## [1] 2.07271
stressplot(nmdsk2, pch = 20)

## -----------------------------------------------------------------------------
ggplot(dfekm, aes(x = MDS1, y = MDS2)) +
geom_point(col = dfekm$rgb, size = 4) +
geom_text_repel(aes(label = name)) + coord_fixed()

nmdsk2$points[, 1:2] |>
`colnames<-`(paste0("NmMDS", 1:2)) |>
as_tibble() |>
bind_cols(dplyr::select(dfekm, rgb, name)) |>
ggplot(aes(x = NmMDS1, y = NmMDS2)) +
geom_point(col = dfekm$rgb, size = 4) +
geom_text_repel(aes(label = name))

## -----------------------------------------------------------------------------
IBDchip = readRDS("../data/vsn28Exprd.rds")
library("ade4")
library("factoextra")
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("sva")
## Loading required package: mgcv
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.8-39. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
## Loading required package: BiocParallel
## -----------------------------------------------------------------------------
class(IBDchip)
## [1] "matrix" "array"
dim(IBDchip)
## [1] 8635 28
tail(IBDchip[,1:3])
## 20CF 20DF 20MF
## bm-026.1.sig_st 7.299308 7.275802 7.383103
## bm-125.1.sig_st 8.538857 8.998562 9.296096
## bru.tab.d.HIII.Con32.sig_st 6.802736 6.777566 6.859950
## bru.tab.d.HIII.Con323.sig_st 6.463604 6.501139 6.611851
## bru.tab.d.HIII.Con5.sig_st 5.739235 5.666060 5.831079
## day 2.000000 2.000000 2.000000
table(IBDchip[nrow(IBDchip), ])
##
## 1 2 3
## 8 16 4
## -----------------------------------------------------------------------------
assayIBD = IBDchip[-nrow(IBDchip), ]
day = factor(IBDchip[nrow(IBDchip), ])
day
## 20CF 20DF 20MF 20PF CC CD
## 2 2 2 2 1 1
## CM CNTR23CF CNTR23DF CNTR23MF CNTR23PF CNTRCF
## 1 2 2 2 2 2
## CNTRDF CNTRMF CNTRPF CP IBS1_CntrCb IBS1_CntrPb
## 2 2 2 1 3 3
## IBS1_IBSCb IBS1_IBSPb IBSC IBSD IBSM IBSP
## 3 3 2 2 2 2
## IC ID IM IP
## 1 1 1 1
## Levels: 1 2 3
## -----------------------------------------------------------------------------
rankthreshPCA = function(x, threshold = 3000) {
ranksM = apply(x, 2, rank)
ranksM[ranksM < threshold] = threshold
ranksM = threshold - ranksM
dudi.pca(t(ranksM), scannf = FALSE, nf = 2)
}
pcaDay12 = rankthreshPCA(assayIBD[, day != 3])
pcaDay12
## Duality diagramm
## class: pca dudi
## $call: dudi.pca(df = t(ranksM), scannf = FALSE, nf = 2)
##
## $nf: 2 axis-components saved
## $rank: 23
## eigen values: 1401 1033 586 506.7 378.2 ...
## vector length mode content
## 1 $cw 8634 numeric column weights
## 2 $lw 24 numeric row weights
## 3 $eig 23 numeric eigen values
##
## data.frame nrow ncol content
## 1 $tab 24 8634 modified array
## 2 $li 24 2 row coordinates
## 3 $l1 24 2 row normed scores
## 4 $co 8634 2 column coordinates
## 5 $c1 8634 2 column normed scores
## other elements: cent norm
fviz_eig(pcaDay12, bar_width = 0.6) + ggtitle("")

## -----------------------------------------------------------------------------
day12 = day[ day!=3 ]
rtPCA1 = fviz(pcaDay12, element = "ind", axes = c(1, 2), geom = c("point", "text"),
habillage = day12, repel = TRUE, palette = "Dark2",
addEllipses = TRUE, ellipse.type = "convex") + ggtitle("") +
coord_fixed()
rtPCA1

## -----------------------------------------------------------------------------
pcaDay123 = rankthreshPCA(assayIBD)
fviz(pcaDay123, element = "ind", axes = c(1, 2), geom = c("point", "text"),
habillage = day, repel = TRUE, palette = "Dark2",
addEllipses = TRUE, ellipse.type = "convex") +
ggtitle("") + coord_fixed()

## -----------------------------------------------------------------------------
rtPCA1

fviz(pcaDay123, element="ind", axes=c(1,2), geom=c("point","text"),
habillage = day, repel=TRUE, palette = "Dark2",
addEllipses = TRUE, ellipse.type = "convex") + ggtitle("") +
coord_fixed()

## -----------------------------------------------------------------------------
fviz_pca_ind(pcaDay123, habillage = day, labelsize = 3,
palette = "Dark2", addEllipses = TRUE, ellipse.level = 0.69)

## -----------------------------------------------------------------------------
fviz_eig(pcaDay123, bar_width=0.6) + ggtitle("")

model0 = model.matrix(~1, day)
model0
## (Intercept)
## 20CF 1
## 20DF 1
## 20MF 1
## 20PF 1
## CC 1
## CD 1
## CM 1
## CNTR23CF 1
## CNTR23DF 1
## CNTR23MF 1
## CNTR23PF 1
## CNTRCF 1
## CNTRDF 1
## CNTRMF 1
## CNTRPF 1
## CP 1
## IBS1_CntrCb 1
## IBS1_CntrPb 1
## IBS1_IBSCb 1
## IBS1_IBSPb 1
## IBSC 1
## IBSD 1
## IBSM 1
## IBSP 1
## IC 1
## ID 1
## IM 1
## IP 1
## attr(,"assign")
## [1] 0
## -----------------------------------------------------------------------------
combatIBD = ComBat(dat = assayIBD, batch = day, mod = model0)
## Found3batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
pcaDayBatRM = rankthreshPCA(combatIBD)
fviz(pcaDayBatRM, element = "ind", geom = c("point", "text"),
habillage = day, repel=TRUE, palette = "Dark2", addEllipses = TRUE,
ellipse.type = "convex", axes =c(1,2)) + coord_fixed() + ggtitle("")

library("SummarizedExperiment")
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:genefilter':
##
## rowSds, rowVars
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## The following objects are masked from 'package:genefilter':
##
## rowSds, rowVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:ade4':
##
## score
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:nlme':
##
## collapse
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
colnames(IBDchip)
## [1] "20CF" "20DF" "20MF" "20PF" "CC"
## [6] "CD" "CM" "CNTR23CF" "CNTR23DF" "CNTR23MF"
## [11] "CNTR23PF" "CNTRCF" "CNTRDF" "CNTRMF" "CNTRPF"
## [16] "CP" "IBS1_CntrCb" "IBS1_CntrPb" "IBS1_IBSCb" "IBS1_IBSPb"
## [21] "IBSC" "IBSD" "IBSM" "IBSP" "IC"
## [26] "ID" "IM" "IP"
treatment = factor(ifelse(grepl("Cntr|^C", colnames(IBDchip)), "CTL", "IBS"))
treatment
## [1] IBS IBS IBS IBS CTL CTL CTL CTL CTL CTL CTL CTL CTL CTL CTL CTL CTL CTL IBS
## [20] IBS IBS IBS IBS IBS IBS IBS IBS IBS
## Levels: CTL IBS
## -----------------------------------------------------------------------------
sampledata = DataFrame(day = day, treatment = treatment)
chipse = SummarizedExperiment(assays = list(abundance = assayIBD),
colData = sampledata)
## -----------------------------------------------------------------------------
# check whether the 'grep'ed treatment status agree with
# a manual 'hardcoded' version provided by Susan previously.
stopifnot(identical(chipse$treatment, factor(c("IBS", "CTL")[
c(1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1)])))
## -----------------------------------------------------------------------------
chipse[, day == 2]
## class: SummarizedExperiment
## dim: 8634 16
## metadata(0):
## assays(1): abundance
## rownames(8634): 01010101000000.2104_gPM_GC 01010101000000.2141_gPM_GC
## ... bru.tab.d.HIII.Con323.sig_st bru.tab.d.HIII.Con5.sig_st
## rowData names(0):
## colnames(16): 20CF 20DF ... IBSM IBSP
## colData names(2): day treatment
## -----------------------------------------------------------------------------
corese = readRDS("../data/normse.rds")
corese
## class: SummarizedExperiment
## dim: 1000 747
## metadata(0):
## assays(3): counts normalizedValues residuals
## rownames(1000): Cbr2 Cyp2f2 ... Rnf13 Atp7b
## rowData names(0):
## colnames(747): OEP01_N706_S501 OEP01_N701_S501 ... OEL23_N704_S503
## OEL23_N703_S502
## colData names(69): Experiment Batch ... W49 W50
assays(corese)
## List of length 3
## names(3): counts normalizedValues residuals
names(assays(corese))
## [1] "counts" "normalizedValues" "residuals"
dim(assays(corese)$counts)
## [1] 1000 747
dim(assays(corese)$normalizedValues)
## [1] 1000 747
dim(assays(corese)$residuals)
## [1] 1000 747
head(colData(corese))
## DataFrame with 6 rows and 69 columns
## Experiment Batch publishedClusters NREADS NALIGNED
## <factor> <factor> <numeric> <numeric> <numeric>
## OEP01_N706_S501 K5ERRY_UI_96HPT Y01 1 3313260 3167600
## OEP01_N701_S501 K5ERRY_UI_96HPT Y01 1 2902430 2757790
## OEP01_N707_S507 K5ERRY_UI_96HPT Y01 1 2307940 2178350
## OEP01_N705_S501 K5ERRY_UI_96HPT Y01 1 3337400 3183720
## OEP01_N709_S501 K5ERRY_UI_96HPT Y01 -2 3502620 3352230
## OEP01_N702_S505 K5ERRY_UI_96HPT Y01 1 2984830 2844350
## RALIGN TOTAL_DUP PRIMER PCT_RIBOSOMAL_BASES
## <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 95.6035 47.9943 0.0154566 2e-06
## OEP01_N701_S501 95.0167 45.0150 0.0182066 0e+00
## OEP01_N707_S507 94.3852 43.7832 0.0219196 0e+00
## OEP01_N705_S501 95.3952 43.2688 0.0183041 2e-06
## OEP01_N709_S501 95.7065 54.6632 0.0165818 1e-06
## OEP01_N702_S505 95.2935 66.3881 0.0190047 0e+00
## PCT_CODING_BASES PCT_UTR_BASES PCT_INTRONIC_BASES
## <numeric> <numeric> <numeric>
## OEP01_N706_S501 0.200130 0.230654 0.404205
## OEP01_N701_S501 0.182461 0.201810 0.465702
## OEP01_N707_S507 0.152627 0.207897 0.511416
## OEP01_N705_S501 0.169514 0.207342 0.457556
## OEP01_N709_S501 0.310654 0.268904 0.283935
## OEP01_N702_S505 0.305311 0.283584 0.211970
## PCT_INTERGENIC_BASES PCT_MRNA_BASES MEDIAN_CV_COVERAGE
## <numeric> <numeric> <numeric>
## OEP01_N706_S501 0.165009 0.430784 0.843857
## OEP01_N701_S501 0.150027 0.384271 0.914370
## OEP01_N707_S507 0.128060 0.360524 0.955405
## OEP01_N705_S501 0.165586 0.376856 0.816630
## OEP01_N709_S501 0.136506 0.579558 0.715861
## OEP01_N702_S505 0.199135 0.588895 1.011250
## MEDIAN_5PRIME_BIAS MEDIAN_3PRIME_BIAS CreER ERCC_reads
## <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 0.061028 0.521079 1 10516
## OEP01_N701_S501 0.033350 0.373993 3022 9331
## OEP01_N707_S507 0.014606 0.491230 2329 7386
## OEP01_N705_S501 0.101798 0.525238 717 6387
## OEP01_N709_S501 0.136549 0.497042 2 8362
## OEP01_N702_S505 0.013452 0.453206 7007 33321
## W1 W2 W3 W4 W5 W6
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 0.520220 0.7111960 -0.964288 0.736816 -0.2326359 0.2107019
## OEP01_N701_S501 0.426466 0.1297278 -0.555788 0.245425 0.0141273 -0.0471313
## OEP01_N707_S507 0.702733 0.2162028 -0.376394 0.466649 -0.3101722 0.0305582
## OEP01_N705_S501 0.578357 -0.0396907 -0.357542 0.426910 0.2361442 -0.0049643
## OEP01_N709_S501 -0.622039 -0.3156792 0.530188 1.069105 -1.9034926 0.6185733
## OEP01_N702_S505 0.417916 1.1798491 -0.652745 1.667191 -3.0199206 1.9204409
## W7 W8 W9 W10 W11
## <numeric> <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 0.35886105 0.0107942 -0.278665 0.2682341 -0.310914
## OEP01_N701_S501 -0.01789151 0.0146882 -0.374432 -0.0954823 -0.322615
## OEP01_N707_S507 -0.12752002 -0.5293184 -0.749898 -0.4576471 -0.437090
## OEP01_N705_S501 0.00454701 -0.1874755 -0.216097 -0.1534220 -0.275354
## OEP01_N709_S501 -1.18603972 -0.1784294 -1.138336 0.2595522 0.167002
## OEP01_N702_S505 0.01361619 1.0272006 2.622282 -1.8732545 1.202000
## W12 W13 W14 W15 W16
## <numeric> <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 -0.0972732 0.00386093 0.5995104 -0.00847401 0.173571
## OEP01_N701_S501 0.0220218 -0.00131921 0.2645274 0.11864449 -0.148296
## OEP01_N707_S507 -0.2815862 0.05257196 0.2150666 0.33405161 0.231073
## OEP01_N705_S501 0.1305329 -0.30296558 -0.1121428 0.45152923 -0.582003
## OEP01_N709_S501 -0.2005908 -0.46462971 -0.0845307 0.15624995 -0.275188
## OEP01_N702_S505 -2.2273784 3.69341246 2.0219742 -2.37901395 -1.742345
## W17 W18 W19 W20 W21
## <numeric> <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 0.0548535 -0.1185615 -0.170439 -0.177957 -0.02763525
## OEP01_N701_S501 -0.2170747 -0.0613756 0.136067 -0.608353 -0.15161779
## OEP01_N707_S507 -0.1143111 0.7471833 0.145397 -0.977857 -0.27504652
## OEP01_N705_S501 0.1636372 0.0792015 0.163124 0.245431 -0.00828173
## OEP01_N709_S501 -0.8483263 0.6533705 -0.161370 -0.202132 1.50893593
## OEP01_N702_S505 -2.7199021 -2.7202103 -1.917451 1.608239 -2.48233730
## W22 W23 W24 W25 W26
## <numeric> <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 -0.1101543 -0.0452483 -0.2466602 0.1410014 0.1925462
## OEP01_N701_S501 -0.3549235 -0.0582717 -0.1050211 -0.0587576 -0.0264406
## OEP01_N707_S507 -0.4399909 -0.2052275 0.0898161 0.0378414 0.0131375
## OEP01_N705_S501 -0.0693465 -0.0480346 0.2798822 -0.0222410 -0.2152044
## OEP01_N709_S501 -0.8484122 -0.3315167 -0.9691109 0.4486845 -1.5099861
## OEP01_N702_S505 1.2620105 0.1750976 0.8911051 0.0748229 1.9686151
## W27 W28 W29 W30 W31
## <numeric> <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 -0.0714981 -0.0792524 0.224363 0.14884034 -0.176882
## OEP01_N701_S501 -0.4952902 -0.0893022 0.230802 0.60100328 -0.204547
## OEP01_N707_S507 -0.1087171 -0.3183907 0.081320 -0.32288254 -0.245785
## OEP01_N705_S501 -0.2601506 0.3927593 0.049813 0.23798699 -0.283526
## OEP01_N709_S501 -0.5750850 -0.4881507 -0.984324 0.94240028 -0.340385
## OEP01_N702_S505 0.7140347 -0.8917797 1.172403 0.00813556 0.766339
## W32 W33 W34 W35 W36
## <numeric> <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 0.1107873 -0.242199 -0.1128412 0.0486555 0.0406623
## OEP01_N701_S501 0.0295410 0.408077 -0.0209646 0.2154537 -0.2847428
## OEP01_N707_S507 -0.0559962 0.132861 0.4553084 0.4884976 -0.2712777
## OEP01_N705_S501 -0.3440416 0.280244 -0.0112784 0.0902062 -0.3242816
## OEP01_N709_S501 0.2662828 -0.345783 0.0678599 0.2062731 0.2002757
## OEP01_N702_S505 0.6875437 1.189379 0.4218973 0.9876865 0.2397630
## W37 W38 W39 W40 W41 W42
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 -0.34664087 0.296591 -0.397047 0.660618 0.151844 -0.4104539
## OEP01_N701_S501 -0.31658029 0.112225 0.204178 0.161759 0.110611 -0.2328593
## OEP01_N707_S507 0.05948657 0.236327 0.249922 -0.271585 0.614970 0.2856108
## OEP01_N705_S501 -0.00017546 -0.454404 0.112083 -0.142695 0.267346 0.0835569
## OEP01_N709_S501 0.62394067 -0.676848 1.391421 0.248249 -0.764525 -2.0354537
## OEP01_N702_S505 -0.17287513 -0.640165 0.843831 0.134415 -0.190887 0.0351919
## W43 W44 W45 W46 W47 W48
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## OEP01_N706_S501 -0.2994574 -0.1771022 0.0199596 -0.1850723 -0.287603 0.116291
## OEP01_N701_S501 0.0124421 -0.0730977 0.1268364 -0.4021831 -0.303506 -0.225725
## OEP01_N707_S507 -0.2248810 -0.2901847 -0.2951189 -0.2187010 -0.089894 -0.185862
## OEP01_N705_S501 -0.0848938 0.5588053 0.2413573 0.3476736 0.106708 -0.223877
## OEP01_N709_S501 -0.3648261 -0.2004645 0.1244308 0.0406688 1.491178 0.510538
## OEP01_N702_S505 -0.4896780 0.2840110 0.2488074 0.8956302 0.837638 0.574261
## W49 W50
## <numeric> <numeric>
## OEP01_N706_S501 -0.1379595 0.311405
## OEP01_N701_S501 -0.5307374 0.110388
## OEP01_N707_S507 -0.4666151 -0.163571
## OEP01_N705_S501 -0.0464004 -0.356657
## OEP01_N709_S501 -0.4225694 0.601275
## OEP01_N702_S505 -0.0437345 0.343162
norm = assays(corese)$normalizedValues
## -----------------------------------------------------------------------------
length(unique(colData(corese)$Batch))
## [1] 18
dim(norm) #1000 genes 747 samples
## [1] 1000 747
rownames(norm)[1:10]
## [1] "Cbr2" "Cyp2f2" "Gstm1" "Sec14l3" "Cyp2g1" "Sox11" "Ebf1"
## [8] "Clu" "Fmo6" "Ugt2a1"
## -----------------------------------------------------------------------------
respca = dudi.pca(t(norm), nf = 3, scannf = FALSE) # dudi.pca use samples as rows to cluster rows
plotscree(respca, 15)

PCS = respca$li[, 1:3]
## -----------------------------------------------------------------------------
library("RColorBrewer")
publishedClusters = colData(corese)[, "publishedClusters"]
batch = colData(corese)$Batch
col_clus = c(
"transparent", "#1B9E77", "antiquewhite2", "cyan",
"#E7298A", "#A6CEE3", "#666666", "#E6AB02", "#FFED6F",
"darkorchid2", "#B3DE69", "#FF7F00", "#A6761D", "#1F78B4")
names(col_clus) = sort(unique(publishedClusters))
## -----------------------------------------------------------------------------
library("rgl")
batch = colData(corese)$Batch
plot3d(PCS,aspect=sqrt(c(84,24,20)),col=col_clus[batch])
plot3d(PCS,aspect=sqrt(c(84,24,20)),
col = col_clus[as.character(publishedClusters)])
## -----------------------------------------------------------------------------
HIV <- data.frame(
Patient = c('AHX112', 'AHX717', 'AHX543'),
Mut1 = c(0, 1, 1),
Mut2 = c(0, 0, 0),
Mut3 = c(0, 1, 0),
'...' = rep(' ', 3))
knitr::kable(HIV, format = 'html')
|
Patient
|
Mut1
|
Mut2
|
Mut3
|
…
|
|
AHX112
|
0
|
0
|
0
|
|
|
AHX717
|
1
|
0
|
1
|
|
|
AHX543
|
1
|
0
|
0
|
|
## -----------------------------------------------------------------------------
crossHIV <- data.frame(
Patient = c('Mut1', 'Mut2', 'Mut3'),
Mut1 = c(853, 29, 10),
Mut2 = c(29, 853, 52),
Mut3 = c(10, 52, 853),
'...' = rep(' ', 3))
knitr::kable(crossHIV, format = 'html')
|
Patient
|
Mut1
|
Mut2
|
Mut3
|
…
|
|
Mut1
|
853
|
29
|
10
|
|
|
Mut2
|
29
|
853
|
52
|
|
|
Mut3
|
10
|
52
|
853
|
|
## -----------------------------------------------------------------------------
cooc = read.delim2("../data/coccurHIV.txt", header = TRUE, sep = ",")
cooc[1:4, 1:11]
## X4S X6D X6K X11R X20R X21I X35I X35L X35M X35T X39A
## 4S 0 28 8 0 99 0 22 5 15 3 45
## 6D 26 0 0 34 131 0 108 4 30 13 84
## 6K 7 0 0 6 45 0 5 13 38 35 12
## 11R 0 35 7 0 127 12 60 17 15 6 42
HIVca = dudi.coa(cooc, nf = 4, scannf = FALSE)
fviz_eig(HIVca, geom = "bar", bar_width = 0.6) + ggtitle("")

## -----------------------------------------------------------------------------
library("rgl")
CA1=HIVca$li[,1];CA2=HIVca$li[,2];CA3=HIVca$li[,3]
plot3d(CA1,CA2,CA3,aspect=FALSE,col="purple")
## -----------------------------------------------------------------------------
fviz_ca_row(HIVca,axes = c(1, 2),geom="text", col.row="purple",
labelsize=3)+ggtitle("") + xlim(-0.55, 1.7) + ylim(-0.53,1.1) +
theme_bw() + coord_fixed()

fviz_ca_row(HIVca,axes = c(1, 3), geom="text",col.row="purple",
labelsize=3)+ggtitle("")+ xlim(-0.55, 1.7)+ylim(-0.5,0.6) +
theme_bw() + coord_fixed()

## -----------------------------------------------------------------------------
fviz_ca_row(HIVca, axes=c(1, 3), geom="text", col.row="purple", labelsize=3) +
ggtitle("") + theme_minimal() + coord_fixed()

## -----------------------------------------------------------------------------
HairEyeColor
## , , Sex = Male
##
## Eye
## Hair Brown Blue Hazel Green
## Black 32 11 10 3
## Brown 53 50 25 15
## Red 10 10 7 7
## Blond 3 30 5 8
##
## , , Sex = Female
##
## Eye
## Hair Brown Blue Hazel Green
## Black 36 9 5 2
## Brown 66 34 29 14
## Red 16 7 7 7
## Blond 4 64 5 8
HairColor = HairEyeColor[,,2]
HairColor
## Eye
## Hair Brown Blue Hazel Green
## Black 36 9 5 2
## Brown 66 34 29 14
## Red 16 7 7 7
## Blond 4 64 5 8
chisq.test(HairColor)
## Warning in chisq.test(HairColor): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: HairColor
## X-squared = 106.66, df = 9, p-value < 2.2e-16
## -----------------------------------------------------------------------------
knitr::kable(HairColor, format = 'html')
|
|
Brown
|
Blue
|
Hazel
|
Green
|
|
Black
|
36
|
9
|
5
|
2
|
|
Brown
|
66
|
34
|
29
|
14
|
|
Red
|
16
|
7
|
7
|
7
|
|
Blond
|
4
|
64
|
5
|
8
|
## -----------------------------------------------------------------------------
rowsums = as.matrix(apply(HairColor, 1, sum))
rowsums
## [,1]
## Black 52
## Brown 143
## Red 37
## Blond 81
colsums = as.matrix(apply(HairColor, 2, sum))
t(colsums)
## Brown Blue Hazel Green
## [1,] 122 114 46 31
HCexp = rowsums %*%t (colsums) / sum(colsums)
## -----------------------------------------------------------------------------
mosaicplot(HCexp, shade=TRUE, las=1, type="pearson", cex.axis=0.7, main="")

## -----------------------------------------------------------------------------
sum((HairColor - HCexp)^2/HCexp)
## [1] 106.6637
## -----------------------------------------------------------------------------
round(t(HairColor-HCexp))
## Hair
## Eye Black Brown Red Blond
## Brown 16 10 2 -28
## Blue -10 -18 -6 34
## Hazel -3 8 2 -7
## Green -3 0 3 0
library("vcd")
## Loading required package: grid
##
## Attaching package: 'vcd'
## The following object is masked from 'package:GenomicRanges':
##
## tile
## The following object is masked from 'package:IRanges':
##
## tile
mosaicplot(HairColor, shade=TRUE, las=1, type="pearson", cex.axis=0.7, main="")

## -----------------------------------------------------------------------------
HC = as.data.frame.matrix(HairColor)
coaHC = dudi.coa(HC,scannf=FALSE,nf=2)
round(coaHC$eig[1:3]/sum(coaHC$eig)*100)
## [1] 89 10 2
fviz_ca_biplot(coaHC, repel=TRUE, col.col="brown", col.row="purple") +
ggtitle("") + ylim(c(-0.5,0.5))

## -----------------------------------------------------------------------------
library("vegan")
res.ca = vegan::cca(HairColor)
plot(res.ca, scaling=3)

## -----------------------------------------------------------------------------
load("../data/lakes.RData")
dim(lakelike)
## [1] 10 15
lakelike[1:3,1:8]
## plant1 plant2 plant3 plant4 plant5 plant6 plant7 plant8
## loc1 6 4 0 3 0 0 0 0
## loc2 4 5 5 3 4 2 0 0
## loc3 3 4 7 4 5 2 1 1
reslake=dudi.coa(lakelike,scannf=FALSE,nf=2) # dudi.coa: correspondence analysis among the rows
reslake
## Duality diagramm
## class: coa dudi
## $call: dudi.coa(df = lakelike, scannf = FALSE, nf = 2)
##
## $nf: 2 axis-components saved
## $rank: 9
## eigen values: 0.369 0.1622 0.05687 0.0222 0.01854 ...
## vector length mode content
## 1 $cw 15 numeric column weights
## 2 $lw 10 numeric row weights
## 3 $eig 9 numeric eigen values
##
## data.frame nrow ncol content
## 1 $tab 10 15 modified array
## 2 $li 10 2 row coordinates
## 3 $l1 10 2 row normed scores
## 4 $co 15 2 column coordinates
## 5 $c1 15 2 column normed scores
## other elements: N
length(reslake$eig)
## [1] 9
round(reslake$eig[1:8]/sum(reslake$eig),2)
## [1] 0.56 0.25 0.09 0.03 0.03 0.02 0.01 0.00
## -----------------------------------------------------------------------------
fviz_ca_row(reslake,repel=TRUE)+ggtitle("")+ylim(c(-0.55,1.7))

fviz_ca_biplot(reslake,repel=TRUE)+ggtitle("")+ylim(c(-0.55,1.7))

## -----------------------------------------------------------------------------
## # Provenance tracking, keep this for the record:
## Nor = read.csv("../data/nbt.3154-S3.csv",row.names=1)
## dim(Nor)
## blom = as.matrix(Nor)
## desc1=unlist(strsplit(rownames(blom),"_"))
## desc=desc1[seq(1,7867,2)]
## gr4sfg=which(substr(rownames(blom),1,5)=="4SFGA")
## gr4sf=which(substr(rownames(blom),1,4)=="4SGA")
## gr1=which(substr(rownames(blom),1,2)=="PS")
## gr2=which(substr(rownames(blom),1,2)=="NP")
## gr3=which(substr(rownames(blom),1,2)=="HF")
## colscells=c("blue","green","orange","red","purple")
## colnb=rep(0,3934)
## colnb[gr1]=1
## colnb[gr2]=2
## colnb[gr3]=3
## colnb[gr4sf]=4
## colnb[gr4sfg]=5
## typesort=rep(0,3934)
## typesort[ nchar(desc) < 5 & substr(rownames(blom), 3, 3) == "A"] = "sortA"
## typesort[ nchar(desc) < 5 & substr(rownames(blom), 3, 3) == "B"] = "sortB"
## typesort[ nchar(desc) >= 5 ] = "sortA"
## ftable(typesort)
## celltypes=as.factor(c("PS","NP","HF","4SG","4SGF-")[colnb])
## cellcol = colscells[colnb]
## colCells = DataFrame(celltypes=celltypes, cellcol=colscells[colnb])
## Moignard= SummarizedExperiment(assays=list(assayCells = blom), rowData=colCells)
## saveRDS(Moignard,file="../data/Moignard.rds")
## -----------------------------------------------------------------------------
Moignard = readRDS("../data/Moignard.rds")
Moignard
## class: SummarizedExperiment
## dim: 3934 46
## metadata(0):
## assays(1): assayCells
## rownames(3934): HFA1_001 HFA1_002 ... 4SFGA6_246 4SFGA6_247
## rowData names(2): celltypes cellcol
## colnames(46): Cbfa2t3h Cdh1 ... Tbx3 Ubc
## colData names(0):
cellt = rowData(Moignard)$celltypes
cellt[1:10]
## [1] HF HF HF HF HF HF HF HF HF HF
## Levels: 4SG 4SGF- HF NP PS
table(cellt) # five populations from between embryonic days E7.0 and E8.25.
## cellt
## 4SG 4SGF- HF NP PS
## 983 770 1005 552 624
colsn = c("red", "purple", "orange", "green", "blue")
blom = assay(Moignard)
dim(blom) #3934 cells and 46 genes
## [1] 3934 46
blom[1,]
## Cbfa2t3h Cdh1 Cdh5 Egfl7 Eif2b1 Erg
## -2.59885576 -14.00000000 1.81510127 -0.09616803 -0.10803126 -4.03859313
## Ets1 Ets2 Etv2 Etv6 Fli1 FoxH1
## -2.83606668 1.13154874 -0.67196310 -1.63685031 3.42368180 0.39495055
## FoxO4 Gata1 Gfi1 Gfi1b HbbbH1 Hhex
## -2.37693628 -4.92417205 -14.00000000 -14.00000000 -4.06128704 -2.47586930
## HoxB2 HoxB4 HoxD8 Ikaros Itga2b Kdr
## -14.00000000 -4.03842468 -14.00000000 -4.25957146 -4.77724528 1.04701024
## Kit Ldb1 Lmo2 Lyl1 Mecom Meis1
## -3.01044681 -3.48850936 -3.05008882 -0.40674610 -5.12874538 -1.92857888
## Mitf Mrpl19 Myb Nfe2 Notch1 Pecam1
## -7.27555173 -2.09703105 -14.00000000 -14.00000000 -5.00328033 -0.04681895
## Polr2a Procr Runx1 Sfpi1 Sox17 Sox7
## -0.86309075 -2.22256484 -3.47034809 -4.47841126 -3.14228012 -0.41408476
## Tal1 Tbx20 Tbx3 Ubc
## 0.14474502 -8.10209754 -2.47731723 3.06815305
dist2n.euclid = dist(blom)
length(dist2n.euclid)
## [1] 7736211
dim(as.matrix(dist2n.euclid))
## [1] 3934 3934
(3934*3934-3934)/2
## [1] 7736211
dist1n.l1 = dist(blom, "manhattan")
## -----------------------------------------------------------------------------
ce1Mds = cmdscale(dist1n.l1, k = 20, eig = TRUE)
ce2Mds = cmdscale(dist2n.euclid, k = 20, eig = TRUE)
perc1 = round(100*sum(ce1Mds$eig[1:2])/sum(ce1Mds$eig))
perc2 = round(100*sum(ce2Mds$eig[1:2])/sum(ce2Mds$eig))
## -----------------------------------------------------------------------------
plotscree(ce1Mds, m = 4)

plotscree(ce2Mds, m = 4)

## -----------------------------------------------------------------------------
c1mds = ce1Mds$points[, 1:2] |>
`colnames<-`(paste0("L1_PCo", 1:2)) |>
as_tibble()
ggplot(c1mds, aes(x = L1_PCo1, y = L1_PCo2, color = cellt)) +
geom_point(aes(color = cellt), alpha = 0.6) +
scale_colour_manual(values = colsn) + guides(color = "none")

c2mds = ce2Mds$points[, 1:2] |>
`colnames<-`(paste0("L2_PCo", 1:2)) |>
as_tibble()
ggplot(c2mds, aes(x = L2_PCo1, y = L2_PCo2, color = cellt)) +
geom_point(aes(color = cellt), alpha = 0.6) +
scale_colour_manual(values = colsn) + guides(color = "none")

## -----------------------------------------------------------------------------
## Hack from https://stackoverflow.com/questions/12041042/how-to-plot-just-the-legends-in-ggplot2
ggpcolor = ggplot(c1mds,aes(x=L1_PCo1,y=L1_PCo2, color = cellt)) +
geom_point(aes(color = cellt), alpha = 0.6) +
scale_colour_manual(values=colsn, name = "cell type")
g_legend = function(a) {
gt = ggplot_gtable(ggplot_build(a))
leg = which(sapply(gt$grobs, function(x) x$name) == "guide-box")
gt$grobs[[leg]]
}
grid.draw(g_legend(ggpcolor))

## -----------------------------------------------------------------------------
library("Rtsne")
restsne = Rtsne(blom, dims = 2, perplexity = 30, verbose = FALSE,
max_iter = 900)
dftsne = restsne$Y[, 1:2] |>
`colnames<-`(paste0("axis", 1:2)) |>
as_tibble()
ggplot(dftsne,aes(x = axis1, y = axis2, color = cellt)) +
geom_point(aes(color = cellt), alpha = 0.6) +
scale_color_manual(values = colsn) + guides(color = "none")

restsne3 = Rtsne(blom, dims = 3, perplexity = 30, verbose = FALSE,
max_iter = 900)
dftsne3 = restsne3$Y[, 1:3] |>
`colnames<-`(paste0("axis", 1:3)) |>
as_tibble()
ggplot(dftsne3,aes(x = axis3, y = axis2, group = cellt)) +
geom_point(aes(color = cellt), alpha = 0.6) +
scale_colour_manual(values = colsn) + guides(color = "none")

## -----------------------------------------------------------------------------
ukraine_tsne = Rtsne(ukraine_dists, is_distance = TRUE, perplexity = 8)
ukraine_tsne_df = tibble(
PCo1 = ukraine_tsne$Y[, 1],
PCo2 = ukraine_tsne$Y[, 2],
labs = attr(ukraine_dists, "Labels")
)
ggplot(ukraine_tsne_df, aes(x = PCo1, y = PCo2, label = labs)) +
geom_point() + geom_text_repel(col = "#0057b7") + coord_fixed()

## -----------------------------------------------------------------------------
library("genefilter")
load("../data/microbe.rda")
metab = read.csv("../data/metabolites.csv", row.names = 1) |> as.matrix()
dim(metab)
## [1] 487 12
## -----------------------------------------------------------------------------
library("phyloseq")
##
## Attaching package: 'phyloseq'
## The following object is masked from 'package:SummarizedExperiment':
##
## distance
## The following object is masked from 'package:Biobase':
##
## sampleNames
## The following object is masked from 'package:GenomicRanges':
##
## distance
## The following object is masked from 'package:IRanges':
##
## distance
metab = metab[rowSums(metab == 0) <= 3, ]
microbe = prune_taxa(taxa_sums(microbe) > 4, microbe)
microbe = filter_taxa(microbe, filterfun(kOverA(3, 2)), TRUE)
metab = log(1 + metab, base = 10)
X = log(1 + as.matrix(otu_table(microbe)), base = 10)
## -----------------------------------------------------------------------------
colnames(metab) = colnames(X)
pca1 = dudi.pca(t(metab), scal = TRUE, scann = FALSE)
pca2 = dudi.pca(t(X), scal = TRUE, scann = FALSE)
rv1 = RV.rtest(pca1$tab, pca2$tab, 999)
rv1
## Monte-Carlo test
## Call: RV.rtest(df1 = pca1$tab, df2 = pca2$tab, nrepet = 999)
##
## Observation: 0.8400429
##
## Based on 999 replicates
## Simulated p-value: 0.001
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 6.403039478 0.309715288 0.006859875
## -----------------------------------------------------------------------------
library("PMA")
ccaRes = CCA(t(X), t(metab), penaltyx = 0.15, penaltyz = 0.15,
typex = "standard", typez = "standard")
## 123456789
ccaRes
## Call: CCA(x = t(X), z = t(metab), typex = "standard", typez = "standard",
## penaltyx = 0.15, penaltyz = 0.15)
##
##
## Num non-zeros u's: 5
## Num non-zeros v's: 16
## Type of x: standard
## Type of z: standard
## Penalty for x: L1 bound is 0.15
## Penalty for z: L1 bound is 0.15
## Cor(Xu,Zv): 0.9904707
## -----------------------------------------------------------------------------
combined = cbind(t(X[ccaRes$u != 0, ]),
t(metab[ccaRes$v != 0, ]))
pcaRes = dudi.pca(combined, scannf = FALSE, nf = 3)
# annotation
genotype = substr(rownames(pcaRes$li), 1, 2)
sampleType = substr(rownames(pcaRes$l1), 3, 4)
featureType = grepl("\\.", colnames(combined))
featureType = ifelse(featureType, "Metabolite", "OTU")
sampleInfo = data.frame(pcaRes$li, genotype, diet=sampleType)
featureInfo = data.frame(pcaRes$c1, feature = substr(colnames(combined), 1, 6))
## -----------------------------------------------------------------------------
ggplot() + geom_point(data = sampleInfo,
aes(x = Axis1, y = Axis2, col = diet, shape = genotype), size = 3) +
geom_label_repel(data = featureInfo,
aes(x = 5.5 * CS1, y = 5.5 * CS2, label = feature, fill = featureType),
size = 2, segment.size = 0.3,
label.padding = unit(0.1, "lines"), label.size = 0) +
geom_point(data = featureInfo,
aes(x = 5.5 * CS1, y = 5.5 * CS2, fill = featureType),
size = 1, shape = 23, col = "#383838") +
scale_color_brewer(palette = "Set2") +
scale_fill_manual(values = c("#a6d854", "#e78ac3")) +
guides(fill = guide_legend(override.aes = list(shape = 32, size = 0))) +
coord_fixed()+
labs(x = sprintf("Axis1 [%s%% Variance]",
100 * round(pcaRes$eig[1] / sum(pcaRes$eig), 2)),
y = sprintf("Axis2 [%s%% Variance]",
100 * round(pcaRes$eig[2] / sum(pcaRes$eig), 2)),
fill = "Feature Type", col = "Sample Type")
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## -----------------------------------------------------------------------------
ps1=readRDS("../data/ps1.rds")
ps1p=filter_taxa(ps1, function(x) sum(x) > 0, TRUE)
psCCpnA = ordinate(ps1p, "CCA",
formula = ps1p ~ ageBin + family_relationship)
## -----------------------------------------------------------------------------
library("dplyr")
tax = data.frame(tax_table(ps1p),stringsAsFactors = FALSE)
tax$seq = rownames(tax)
mainOrders = c("Clostridiales", "Bacteroidales",
"Lactobacillales", "Coriobacteriales")
tax$Order[!(tax$Order %in% mainOrders)] = "Other"
tax$Order = factor(tax$Order, levels = c(mainOrders, "Other"))
tax$otu_id = seq_len(ncol(otu_table(ps1p)))
scoresCCpnA = vegan::scores(psCCpnA)
sites = data.frame(scoresCCpnA$sites)
sites$SampleID = rownames(sites)
sites = left_join(sites, as(sample_data(ps1p), "data.frame"))
## Joining with `by = join_by(SampleID)`
species = data.frame(scoresCCpnA$species)
species$otu_id = seq_along(colnames(otu_table(ps1p)))
species = left_join(species, tax)
## Joining with `by = join_by(otu_id)`
## -----------------------------------------------------------------------------
evalProp = 100 * psCCpnA$CCA$eig[1:2] / sum(psCCpnA$CA$eig)
ggplot() +
geom_point(data = sites,aes(x =CCA2, y =CCA1),shape =2,alpha=0.5) +
geom_point(data = species,aes(x =CCA2,y =CCA1,col = Order),size=1)+
geom_text_repel(data = dplyr::filter(species, CCA2 < (-2)),
aes(x = CCA2, y = CCA1, label = otu_id),
size = 2, segment.size = 0.1) +
facet_grid(. ~ ageBin) +
guides(col = guide_legend(override.aes = list(size = 2))) +
labs(x = sprintf("Axis2 [%s%% variance]", round(evalProp[2])),
y = sprintf("Axis1 [%s%% variance]", round(evalProp[1]))) +
scale_color_brewer(palette = "Set1") + theme(legend.position="bottom")
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 9 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 9 unlabeled data points (too many overlaps). Consider increasing max.overlaps

## -----------------------------------------------------------------------------
ggplot() +
geom_point(data = sites, aes(x = CCA2, y = CCA1), shape = 2, alpha = 0.5) +
geom_point(data = species, aes(x = CCA2, y = CCA1, col = Order), size = 1) +
geom_text_repel(data = dplyr::filter(species, CCA2 < (-2)),
aes(x = CCA2, y = CCA1, label = otu_id),
size = 2, segment.size = 0.1) +
facet_grid(. ~ family_relationship) +
guides(col = guide_legend(override.aes = list(size = 2))) +
labs(x = sprintf("Axis2 [%s%% variance]", round(evalProp[2])),
y = sprintf("Axis1 [%s%% variance]", round(evalProp[1]))) +
scale_color_brewer(palette = "Set1") + theme(legend.position="bottom")
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 8 unlabeled data points (too many overlaps). Consider increasing max.overlaps

## -----------------------------------------------------------------------------
ibd.pres = ifelse(assayIBD[, 1:28] > 8.633, 1, 0)
ibd.pres[1:3,]
## 20CF 20DF 20MF 20PF CC CD CM CNTR23CF CNTR23DF
## 01010101000000.2104_gPM_GC 0 0 0 0 0 0 0 0 0
## 01010101000000.2141_gPM_GC 0 0 0 0 0 0 0 0 0
## 01010101000000.2147_pPM_GC 0 0 0 0 0 0 0 0 0
## CNTR23MF CNTR23PF CNTRCF CNTRDF CNTRMF CNTRPF CP
## 01010101000000.2104_gPM_GC 0 0 0 0 0 0 0
## 01010101000000.2141_gPM_GC 0 0 0 0 0 0 0
## 01010101000000.2147_pPM_GC 0 0 0 0 0 0 0
## IBS1_CntrCb IBS1_CntrPb IBS1_IBSCb IBS1_IBSPb IBSC
## 01010101000000.2104_gPM_GC 0 0 0 0 0
## 01010101000000.2141_gPM_GC 0 0 0 0 0
## 01010101000000.2147_pPM_GC 0 0 0 0 0
## IBSD IBSM IBSP IC ID IM IP
## 01010101000000.2104_gPM_GC 0 0 0 0 0 0 0
## 01010101000000.2141_gPM_GC 0 0 0 0 0 0 0
## 01010101000000.2147_pPM_GC 0 0 0 0 0 0 0
dim(ibd.pres)
## [1] 8634 28
## -----------------------------------------------------------------------------
IBDca = dudi.coa(ibd.pres, scannf = FALSE, nf = 4)
fviz_eig(IBDca, geom = "bar", bar_width = 0.7) +
ylab("Percentage of chisquare") + ggtitle("")

fviz(IBDca, element = "col", axes = c(1, 2), geom = "point",
habillage = day, palette = "Dark2", addEllipses = TRUE, color = day,
ellipse.type = "convex", alpha = 1, col.row.sup = "blue",
select = list(name = NULL, cos2 = NULL, contrib = NULL),
repel = TRUE)

## -----------------------------------------------------------------------------
d1 <- t(data.frame(
quiet = c(2770, 2150, 2140, 875, 1220, 821, 2510),
angry = c(2970, 1530, 1740, 752, 1040, 710, 1730),
clever = c(1650, 1270, 1320, 495, 693, 416, 1420),
depressed = c(1480, 957, 983, 147, 330, 102, 1270),
happy = c(19300, 8310, 8730, 1920, 4220, 2610, 9150),
lively = c(1840, 1250, 1350, 659, 621, 488, 1480),
perplexed = c(110, 71, 80, 19, 23, 15, 109),
virtuous = c(179, 80, 102, 20, 25, 17, 165)))
colnames(d1) <- c('black','blue','green','grey','orange','purple','white')
knitr::kable(d1, format='html')
|
|
black
|
blue
|
green
|
grey
|
orange
|
purple
|
white
|
|
quiet
|
2770
|
2150
|
2140
|
875
|
1220
|
821
|
2510
|
|
angry
|
2970
|
1530
|
1740
|
752
|
1040
|
710
|
1730
|
|
clever
|
1650
|
1270
|
1320
|
495
|
693
|
416
|
1420
|
|
depressed
|
1480
|
957
|
983
|
147
|
330
|
102
|
1270
|
|
happy
|
19300
|
8310
|
8730
|
1920
|
4220
|
2610
|
9150
|
|
lively
|
1840
|
1250
|
1350
|
659
|
621
|
488
|
1480
|
|
perplexed
|
110
|
71
|
80
|
19
|
23
|
15
|
109
|
|
virtuous
|
179
|
80
|
102
|
20
|
25
|
17
|
165
|
## -----------------------------------------------------------------------------
colorsentiment = read.csv("../data/colorsentiment.csv")
colsent = xtabs(colorsentiment[,3] ~ colorsentiment[,2] + colorsentiment[,1])
coldf = data.frame(unclass(colsent))
coldf = round(coldf / 1000)
# xtable::xtable(round(coldf),display=rep("d", 8))
colorfactor = names(coldf)
veganout = vegan::cca(coldf)
colorfactor[c(4,7)] = c("darkgrey", "grey")
ordiplot(veganout, scaling = 3, type = "none", xlim =c(-1.2, 0.75), ylim =c(-0.7, 1))
text(veganout, "sites", pch = 21, col = "red", bg = "yellow", scaling = 3)
text(veganout, "species", pch = 21, col = colorfactor, bg = "black", cex=1.2, scaling = 3)

## -----------------------------------------------------------------------------
platof = read.table("../data/platof.txt", header = TRUE)
platof[1:4, ]
## Rep Laws Crit Phil Pol Soph Tim
## uuuuu 42 91 5 24 13 26 18
## -uuuu 60 144 3 27 19 33 30
## u-uuu 64 72 3 20 24 31 46
## uu-uu 72 98 2 25 20 24 14
## -----------------------------------------------------------------------------
resPlato = dudi.coa(platof, scannf = FALSE, nf = 2)
fviz_ca_biplot(resPlato, axes=c(2, 1)) + ggtitle("")

fviz_eig(resPlato, geom = "bar", width = 0.6) + ggtitle("")

## -----------------------------------------------------------------------------
names(resPlato)
## [1] "tab" "cw" "lw" "eig" "rank" "nf" "c1" "li" "co" "l1"
## [11] "call" "N"
sum(resPlato$eig)
## [1] 0.132618
percentageInertia=round(100*cumsum(resPlato$eig)/sum(resPlato$eig))
percentageInertia
## [1] 69 85 92 96 98 100
percentageInertia[2]
## [1] 85
## -----------------------------------------------------------------------------
load("../data/lakes.RData")
lakelike[ 1:3, 1:8]
## plant1 plant2 plant3 plant4 plant5 plant6 plant7 plant8
## loc1 6 4 0 3 0 0 0 0
## loc2 4 5 5 3 4 2 0 0
## loc3 3 4 7 4 5 2 1 1
lakelikeh[1:3, 1:8]
## plant1 plant2 plant3 plant4 plant5 plant6 plant7 plant8
## loc1 6 4 0 3 0 0 0 0
## loc2 4 5 5 3 4 2 0 0
## loc3 3 4 7 4 5 2 1 1
e_coa = dudi.coa(lakelike, scannf = FALSE, nf = 2)
e_pca = dudi.pca(lakelike, scannf = FALSE, nf = 2)
eh_coa = dudi.coa(lakelikeh, scannf = FALSE, nf = 2)
eh_pca = dudi.pca(lakelikeh, scannf = FALSE, nf = 2)
## -----------------------------------------------------------------------------
scatter(e_pca)

scatter(e_coa)

s.label(e_pca$li)

s.label(e_coa$li)

s.label(eh_pca$co)

s.label(eh_pca$li)

s.label(eh_coa$li)

s.label(eh_coa$co)

## -----------------------------------------------------------------------------
moignard_raw = as.matrix(read.csv("../data/nbt.3154-S3-raw.csv", row.names = 1))
dist2r.euclid = dist(moignard_raw)
dist1r.l1 = dist(moignard_raw, "manhattan")
cells1.cmds = cmdscale(dist1r.l1, k = 20, eig = TRUE)
cells2.cmds = cmdscale(dist2r.euclid, k = 20, eig = TRUE)
sum(cells1.cmds$eig[1:2]) / sum(cells1.cmds$eig)
## [1] 0.776075
sum(cells2.cmds$eig[1:2]) / sum(cells2.cmds$eig)
## [1] 0.6297133
## -----------------------------------------------------------------------------
library("kernlab")
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:BiocGenerics':
##
## type
## The following object is masked from 'package:permute':
##
## how
## The following object is masked from 'package:ggplot2':
##
## alpha
laplacedot1 = laplacedot(sigma = 1/3934)
rbfdot1 = rbfdot(sigma = (1/3934)^2 )
Klaplace_cellsn = kernelMatrix(laplacedot1, blom)
KGauss_cellsn = kernelMatrix(rbfdot1, blom)
Klaplace_rawcells = kernelMatrix(laplacedot1, moignard_raw)
KGauss_rawcells = kernelMatrix(rbfdot1, moignard_raw)
## -----------------------------------------------------------------------------
dist1kr = 1 - Klaplace_rawcells
dist2kr = 1 - KGauss_rawcells
dist1kn = 1 - Klaplace_cellsn
dist2kn = 1 - KGauss_cellsn
cells1.kcmds = cmdscale(dist1kr, k = 20, eig = TRUE)
cells2.kcmds = cmdscale(dist2kr, k = 20, eig = TRUE)
percentage = function(x, n = 4) round(100 * sum(x[seq_len(n)]) / sum(x[x>0]))
kperc1 = percentage(cells1.kcmds$eig)
kperc2 = percentage(cells2.kcmds$eig)
cellsn1.kcmds = cmdscale(dist1kn, k = 20, eig = TRUE)
cellsn2.kcmds = cmdscale(dist2kn, k = 20, eig = TRUE)
## -----------------------------------------------------------------------------
colc = rowData(Moignard)$cellcol
library("scatterplot3d")
scatterplot3d(cellsn2.kcmds$points[, 1:3], color=colc, pch = 20,
xlab = "Axis k1", ylab = "Axis k2", zlab = "Axis k3", angle=15)

scatterplot3d(cellsn2.kcmds$points[, 1:3], color=colc, pch = 20,
xlab = "Axis k1", ylab = "Axis k2", zlab = "Axis k3", angle = -70)

## -----------------------------------------------------------------------------
library("rgl")
plot3d(cellsn2.kcmds$points[, 1:3], col = colc, size = 3,
xlab = "Axis1", ylab = "Axis2", zlab = "Axis3")
plot3d(cellsn2.kcmds$points[, c(1,2,4)], col = colc, size = 3,
xlab = "Axis1", ylab = "Axis2", zlab = "Axis4")
# Using an L1 distance instead.
plot3d(cellsn1.kcmds$points[, 1:3], col = colc, size = 3,
xlab = "Axis1", ylab = "Axis2", zlab = "Axis3")
plot3d(cellsn1.kcmds$points[, c(1,2,4)], col = colc, size = 3,
xlab = "Axis1", ylab = "Axis2", zlab = "Axis4")
## -----------------------------------------------------------------------------
library("LPCM")
##
## Attaching package: 'LPCM'
## The following object is masked from 'package:SummarizedExperiment':
##
## coverage
## The following object is masked from 'package:GenomicRanges':
##
## coverage
## The following object is masked from 'package:IRanges':
##
## coverage
library("diffusionMap")
dmap1 = diffuse(dist1n.l1, neigen = 10)
## Performing eigendecomposition
## Computing Diffusion Coordinates
## Elapsed time: 7.71 seconds
combs = combn(4, 3)
lpcplots = apply(combs, 2, function(j) lpc(dmap1$X[, j], scale = FALSE))
## -----------------------------------------------------------------------------
library("rgl")
for (i in seq_along(lpcplots))
plot(lpcplots[[i]], type = "l", lwd = 3,
xlab = paste("Axis", combs[1, i]),
ylab = paste("Axis", combs[2, i]),
zlab = paste("Axis", combs[3, i]))




## -----------------------------------------------------------------------------
outlpce134 = lpc(dmap1$X[,c(1,3,4)], scale=FALSE, h=0.5)
plot3d(dmap1$X[,c(1,3,4)], col=colc, pch=20,
xlab="Axis1", ylab="Axis3", zlab="Axis4")
plot3d(outlpce134$LPC, type="l", lwd=7, add=TRUE)
outlpce134 = lpc(dmap1$X[,c(1,3,4)], scale=FALSE, h=0.7)
plot3d(outlpce134$LPC, type="l", lwd=7,
xlab="Axis1", ylab="Axis3", zlab="Axis4")
plot3d(dmap1$X[,c(1,3,4)], col=colc,
xlab="", ylab="", zlab="", add=TRUE)
## -----------------------------------------------------------------------------
library("diffusionMap")
dmap2 = diffuse(dist2n.euclid, neigen = 11)
## Performing eigendecomposition
## Computing Diffusion Coordinates
## Elapsed time: 9.69 seconds
dmap1 = diffuse(dist1n.l1, neigen = 11)
## Performing eigendecomposition
## Computing Diffusion Coordinates
## Elapsed time: 7.6 seconds
plot(dmap2)

## -----------------------------------------------------------------------------
library("scatterplot3d")
scp3d = function(axestop = 1:3, dmapRes = dmap1, color = colc,
anglea = 20, pch = 20)
scatterplot3d(dmapRes$X[, axestop], color = colc,
xlab = paste("Axis",axestop[1]), ylab = paste("Axis", axestop[2]),
zlab = paste("Axis",axestop[3]), pch = pch, angle = anglea)
## -----------------------------------------------------------------------------
# TODO: duplicate of the below as a workaround;
# else white space is rendered between lines
scp3d()

scp3d(anglea=310)

scp3d(anglea=210)

scp3d(anglea=150)

## -----------------------------------------------------------------------------
## scp3d()
## scp3d(anglea=310)
## scp3d(anglea=210)
## scp3d(anglea=150)
## -----------------------------------------------------------------------------
# interactive plot
library("rgl")
plot3d(dmap1$X[,1:3], col=colc, size=3)
plot3d(dmap1$X[,2:4], col=colc, size=3)